From 350afa8a1101f62ce31bc4ed6f69cf4b90ec4fa2 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 8 Aug 2024 06:29:34 +0000 Subject: x86/split_lock: Move Split and Bus lock code to a dedicated file Bus Lock Detect functionality on AMD platforms works identical to Intel. Move split_lock and bus_lock specific code from intel.c to a dedicated file so that it can be compiled and supported on non-Intel platforms. Also, introduce CONFIG_X86_BUS_LOCK_DETECT, make it dependent on CONFIG_CPU_SUP_INTEL and add compilation dependency of the new bus_lock.c file on CONFIG_X86_BUS_LOCK_DETECT. Signed-off-by: Ravi Bangoria Signed-off-by: Thomas Gleixner Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/all/20240808062937.1149-2-ravi.bangoria@amd.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8d150343d42..d606b2a1de99 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -976,7 +976,7 @@ struct task_struct { #ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT -- cgit v1.2.3 From 57d298bdb46bc240498675a7f887fc71089da2c0 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 25 Apr 2024 08:02:36 +0300 Subject: gpu: host1x: Add MLOCK recovery for rest of engines Add class IDs / MLOCKs for MLOCK recovery for rest of engines present on Tegra234. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240425050238.2943404-4-cyndis@kapsi.fi --- include/linux/host1x.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9c8119ed13a4..5a7a81e5f9bd 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -14,12 +14,17 @@ enum host1x_class { HOST1X_CLASS_HOST1X = 0x1, + HOST1X_CLASS_NVJPG1 = 0x7, + HOST1X_CLASS_NVENC = 0x21, + HOST1X_CLASS_NVENC1 = 0x22, HOST1X_CLASS_GR2D = 0x51, HOST1X_CLASS_GR2D_SB = 0x52, HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, + HOST1X_CLASS_NVJPG = 0xC0, HOST1X_CLASS_NVDEC = 0xF0, HOST1X_CLASS_NVDEC1 = 0xF5, + HOST1X_CLASS_OFA = 0xF8, }; struct host1x; -- cgit v1.2.3 From 3d70eb8e60c6d009c988c8e696cb68c77ff59628 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 23 Aug 2024 16:07:24 +0800 Subject: gpu: host1x: Make host1x_context_device_bus_type constant Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the host1x_context_device_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240823080724.148423-1-kunwu.chan@linux.dev --- include/linux/host1x_context_bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/host1x_context_bus.h b/include/linux/host1x_context_bus.h index 72462737a6db..c928cb432680 100644 --- a/include/linux/host1x_context_bus.h +++ b/include/linux/host1x_context_bus.h @@ -9,7 +9,7 @@ #include #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS -extern struct bus_type host1x_context_device_bus_type; +extern const struct bus_type host1x_context_device_bus_type; #endif #endif -- cgit v1.2.3 From 9c75b16cabc69adbbfdc9d219df87c9173f0da0a Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 19 Aug 2024 10:02:22 +0200 Subject: drm/panfrost: Add SYSTEM_TIMESTAMP and SYSTEM_TIMESTAMP_FREQUENCY parameters Expose system timestamp and frequency supported by the GPU. Mali uses an external timer as GPU system time. On ARM, this is wired to the generic arch timer so we wire cntfrq_el0 as device frequency. This new uAPI will be used in Mesa to implement timestamp queries and VK_KHR_calibrated_timestamps. v2: - Rewrote to use GPU timestamp register - Add missing include for arch_timer_get_cntfrq - Rework commit message v3: - Move panfrost_cycle_counter_get and panfrost_cycle_counter_put to panfrost_ioctl_query_timestamp - Handle possible overflow in panfrost_timestamp_read Signed-off-by: Mary Guillemard Tested-by: Heiko Stuebner Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240819080224.24914-2-mary.guillemard@collabora.com --- include/uapi/drm/panfrost_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 9f231d40a146..52b050e2b660 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -172,6 +172,8 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_NR_CORE_GROUPS, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, DRM_PANFROST_PARAM_AFBC_FEATURES, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 3a8d97611b564b5b25f68c90b543056fc9ae0bec Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 19 Aug 2024 10:02:23 +0200 Subject: drm/panfrost: Add cycle counter job requirement Extend the uAPI with a new job requirement flag for cycle counters. This requirement is used by userland to indicate that a job requires cycle counters or system timestamp to be propagated. (for use with write value timestamp jobs) We cannot enable cycle counters unconditionally as this would result in an increase of GPU power consumption. As a result, they should be left off unless required by the application. If a job requires cycle counters or system timestamps propagation, we must enable cycle counting before issuing a job and disable it right after the job completes. Since this extends the uAPI and because userland needs a way to advertise features like VK_KHR_shader_clock conditionally, we bumps the driver minor version. v2: - Rework commit message - Squash uAPI changes and implementation in this commit - Simplify changes based on Steven Price comments v3: - Add Steven Price r-b - Fix a codestyle issue Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Tested-by: Heiko Stuebner Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240819080224.24914-3-mary.guillemard@collabora.com --- include/uapi/drm/panfrost_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 52b050e2b660..568724be6628 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -40,6 +40,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_PERFCNT_DUMP DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump) #define PANFROST_JD_REQ_FS (1 << 0) +#define PANFROST_JD_REQ_CYCLE_COUNT (1 << 1) /** * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D * engine. -- cgit v1.2.3 From cc3e8a216d6b817c509e1e1a3700055d178e04f8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 2 Jun 2024 15:04:49 +0300 Subject: drm/imx: add internal bridge handling display-timings DT node i.MX DRM DT bindings allow using either a proper panel / bridge graph to provide information about connected panels, or just a display-timings DT node, describing just the timings and the flags. Add helper bridge driver supporting the latter usecase. It will be used by both LDB and parallel-display drivers. Reviewed-by: Philipp Zabel Tested-by: Chris Healy Tested-by: Philipp Zabel # on imx6q-nitrogen6x Link: https://patchwork.freedesktop.org/patch/msgid/20240602-drm-imx-cleanup-v3-9-e549e2a43100@linaro.org Signed-off-by: Dmitry Baryshkov --- include/drm/bridge/imx.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/drm/bridge/imx.h (limited to 'include') diff --git a/include/drm/bridge/imx.h b/include/drm/bridge/imx.h new file mode 100644 index 000000000000..e14f429a9ca2 --- /dev/null +++ b/include/drm/bridge/imx.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2012 Sascha Hauer, Pengutronix + */ + +#ifndef DRM_IMX_BRIDGE_H +#define DRM_IMX_BRIDGE_H + +struct drm_bridge *devm_imx_drm_legacy_bridge(struct device *dev, + struct device_node *np, + int type); + +#endif -- cgit v1.2.3 From a778028cc575deeb5224cc798de6e03d37331bca Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Fri, 30 Aug 2024 10:03:50 +0200 Subject: drm/panthor: Add DEV_QUERY_TIMESTAMP_INFO dev query Expose timestamp information supported by the GPU with a new device query. Mali uses an external timer as GPU system time. On ARM, this is wired to the generic arch timer so we wire cntfrq_el0 as device frequency. This new uAPI will be used in Mesa to implement timestamp queries and VK_KHR_calibrated_timestamps. Since this extends the uAPI and because userland needs a way to advertise those features conditionally, this also bumps the driver minor version. v2: - Rewrote to use GPU timestamp register - Added timestamp_offset to drm_panthor_timestamp_info - Add missing include for arch_timer_get_cntfrq - Rework commit message v3: - Add panthor_gpu_read_64bit_counter - Change panthor_gpu_read_timestamp to use panthor_gpu_read_64bit_counter v4: - Fix multiple typos in uAPI documentation - Mention behavior when the timestamp frequency is unknown - Use u64 instead of unsigned long long for panthor_gpu_read_timestamp - Apply r-b from Mihail Signed-off-by: Mary Guillemard Reviewed-by: Mihail Atanassov Reviewed-by: Boris Brezillon Signed-off-by: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20240830080349.24736-2-mary.guillemard@collabora.com --- include/uapi/drm/panthor_drm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 926b1deb1116..95e3a984c69a 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -260,6 +260,9 @@ enum drm_panthor_dev_query_type { /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ DRM_PANTHOR_DEV_QUERY_CSIF_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */ + DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, }; /** @@ -377,6 +380,25 @@ struct drm_panthor_csif_info { __u32 pad; }; +/** + * struct drm_panthor_timestamp_info - Timestamp information + * + * Structure grouping all queryable information relating to the GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /** + * @timestamp_frequency: The frequency of the timestamp timer or 0 if + * unknown. + */ + __u64 timestamp_frequency; + + /** @current_timestamp: The current timestamp. */ + __u64 current_timestamp; + + /** @timestamp_offset: The offset of the timestamp timer. */ + __u64 timestamp_offset; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ -- cgit v1.2.3 From fc9cb46bdca8747aedd86ce304caaddac6df07fd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:51:32 +0300 Subject: drm/i915/pciids: use designated initializers in INTEL_VGA_DEVICE() With IGT no longer using INTEL_VGA_DEVICE(), we can make it kernel specific and use designated initializers. Ditto for INTEL_QUANTA_VGA_DEVICE(). Remove the superfluous comments while at it. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/ce15f8f2a6b672155f9728c8e6a5f49d33fafd24.1725443418.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/intel/i915_pciids.h | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index 2bf03ebfcf73..6a78df5687c5 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -25,27 +25,20 @@ #ifndef _I915_PCIIDS_H #define _I915_PCIIDS_H -/* - * A pci_device_id struct { - * __u32 vendor, device; - * __u32 subvendor, subdevice; - * __u32 class, class_mask; - * kernel_ulong_t driver_data; - * }; - * Don't use C99 here because "class" is reserved and we want to - * give userspace flexibility. - */ -#define INTEL_VGA_DEVICE(id, info) { \ - 0x8086, id, \ - ~0, ~0, \ - 0x030000, 0xff0000, \ - (unsigned long) info } - -#define INTEL_QUANTA_VGA_DEVICE(info) { \ - 0x8086, 0x16a, \ - 0x152d, 0x8990, \ - 0x030000, 0xff0000, \ - (unsigned long) info } +#ifdef __KERNEL__ +#define INTEL_VGA_DEVICE(_id, _info) { \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \ + .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ + .driver_data = (kernel_ulong_t)(_info), \ +} + +#define INTEL_QUANTA_VGA_DEVICE(_info) { \ + .vendor = PCI_VENDOR_ID_INTEL, .device = 0x16a, \ + .subvendor = 0x152d, .subdevice = 0x8990, \ + .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ + .driver_data = (kernel_ulong_t)(_info), \ +} +#endif #define INTEL_I810_IDS(MACRO__, ...) \ MACRO__(0x7121, ## __VA_ARGS__), /* I810 */ \ -- cgit v1.2.3 From a37c68dd80f9951bb48aa44094fce130197ce3a4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:51:33 +0300 Subject: drm/i915/pciids: separate ARL and MTL PCI IDs Avoid including PCI IDs for one platform to the PCI IDs of another. It's more clear to deal with them completely separately at the PCI ID macro level. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/b70af19ea017a76af4678d0a4ee8332253ee1f3b.1725443418.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/intel/i915_pciids.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index 6a78df5687c5..cbb12fdbcb7f 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -764,15 +764,15 @@ INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) -/* MTL */ +/* ARL */ #define INTEL_ARL_IDS(MACRO__, ...) \ MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D67, ## __VA_ARGS__), \ MACRO__(0x7DD1, ## __VA_ARGS__) +/* MTL */ #define INTEL_MTL_IDS(MACRO__, ...) \ - INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0x7D40, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ -- cgit v1.2.3 From ae304b054520fec0a5ad5dec103a37abb53fef0e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:46:47 +0300 Subject: drm/xe/pciids: add some missing ADL-N PCI IDs Similar to commit 425b463859ed ("drm/i915: Update ADL-N PCI IDs"). Reviewed-by: Sai Teja Pottumuttu Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/47d543393e4026588401a03c4e3ce12ce29780e3.1725443121.git.jani.nikula@intel.com --- include/drm/intel/xe_pciids.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 644872a35c35..4aab1bc2327e 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -97,7 +97,9 @@ #define XE_ADLN_IDS(MACRO__, ...) \ MACRO__(0x46D0, ## __VA_ARGS__), \ MACRO__(0x46D1, ## __VA_ARGS__), \ - MACRO__(0x46D2, ## __VA_ARGS__) + MACRO__(0x46D2, ## __VA_ARGS__), \ + MACRO__(0x46D3, ## __VA_ARGS__), \ + MACRO__(0x46D4, ## __VA_ARGS__) /* RPL-S */ #define XE_RPLS_IDS(MACRO__, ...) \ -- cgit v1.2.3 From d454902a690db47f1880f963514bbf0fc7a129a8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:46:48 +0300 Subject: drm/xe/pciids: separate RPL-U and RPL-P PCI IDs Avoid including PCI IDs for one platform to the PCI IDs of another. It's more clear to deal with them completely separately at the PCI ID macro level. Reviewed-by: Sai Teja Pottumuttu Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/4868d36fbfa8c38ea2d490bca82cf6370b8d65dd.1725443121.git.jani.nikula@intel.com --- include/drm/intel/xe_pciids.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 4aab1bc2327e..7a9a7d0a89ca 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -122,7 +122,6 @@ /* RPL-P */ #define XE_RPLP_IDS(MACRO__, ...) \ - XE_RPLU_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0xA720, ## __VA_ARGS__), \ MACRO__(0xA7A0, ## __VA_ARGS__), \ MACRO__(0xA7A8, ## __VA_ARGS__), \ -- cgit v1.2.3 From cdb56a63f7eef34e89b045fc8bcae8d326bbdb19 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:46:49 +0300 Subject: drm/xe/pciids: separate ARL and MTL PCI IDs Avoid including PCI IDs for one platform to the PCI IDs of another. It's more clear to deal with them completely separately at the PCI ID macro level. Reviewed-by: Shekhar Chauhan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/a30cb0da7694a8eccceba66d676ac59aa0e96176.1725443121.git.jani.nikula@intel.com --- include/drm/intel/xe_pciids.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 7a9a7d0a89ca..79001afa7d27 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -176,16 +176,19 @@ XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) -/* MTL / ARL */ +/* ARL */ +#define XE_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + +/* MTL */ #define XE_MTL_IDS(MACRO__, ...) \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) #define XE_LNL_IDS(MACRO__, ...) \ -- cgit v1.2.3 From 498ba74654bec380974d87da04361c5edea07181 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 2 Sep 2024 12:53:48 +0200 Subject: drm/gem-vram: Remove support for simple display pipelines There are no more drivers that use GEM VRAM helpers with a simple display pipeline. Remove the respective code. Signed-off-by: Thomas Zimmermann Acked-by: Gerd Hoffmann Link: https://patchwork.freedesktop.org/patch/msgid/20240902105546.792625-11-tzimmermann@suse.de --- include/drm/drm_gem_vram_helper.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index 9a73f786f4ad..00830b49a3ff 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -17,7 +17,6 @@ struct drm_mode_create_dumb; struct drm_plane; struct drm_plane_state; -struct drm_simple_display_pipe; struct filp; struct vm_area_struct; @@ -137,18 +136,6 @@ drm_gem_vram_plane_helper_cleanup_fb(struct drm_plane *plane, .prepare_fb = drm_gem_vram_plane_helper_prepare_fb, \ .cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb -/* - * Helpers for struct drm_simple_display_pipe_funcs - */ - -int drm_gem_vram_simple_display_pipe_prepare_fb( - struct drm_simple_display_pipe *pipe, - struct drm_plane_state *new_state); - -void drm_gem_vram_simple_display_pipe_cleanup_fb( - struct drm_simple_display_pipe *pipe, - struct drm_plane_state *old_state); - /** * define DRM_GEM_VRAM_DRIVER - default callback functions for * &struct drm_driver -- cgit v1.2.3 From b2ef808786d93df36585cee42cfb973fc41636eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 26 Aug 2024 14:25:38 +0200 Subject: drm/sched: add optional errno to drm_sched_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of drm_sched_start uses a hardcoded -ECANCELED to dispose of a job when the parent/hw fence is NULL. This results in drm_sched_job_done being called with -ECANCELED for each job with a NULL parent in the pending list, making it difficult to distinguish between recovery methods, whether a queue reset or a full GPU reset was used. To improve this, we first try a soft recovery for timeout jobs and use the error code -ENODATA. If soft recovery fails, we proceed with a queue reset, where the error code remains -ENODATA for the job. Finally, for a full GPU reset, we use error codes -ECANCELED or -ETIME. This patch adds an error code parameter to drm_sched_start, allowing us to differentiate between queue reset and GPU reset failures. This enables user mode and test applications to validate the expected correctness of the requested operation. After a successful queue reset, the only way to continue normal operation is to call drm_sched_job_done with the specific error code -ENODATA. v1: Initial implementation by Jesse utilized amdgpu_device_lock_reset_domain and amdgpu_device_unlock_reset_domain to allow user mode to track the queue reset status and distinguish between queue reset and GPU reset. v2: Christian suggested using the error codes -ENODATA for queue reset and -ECANCELED or -ETIME for GPU reset, returned to amdgpu_cs_wait_ioctl. v3: To meet the requirements, we introduce a new function drm_sched_start_ex with an additional parameter to set dma_fence_set_error, allowing us to handle the specific error codes appropriately and dispose of bad jobs with the selected error code depending on whether it was a queue reset or GPU reset. v4: Alex suggested using a new name, drm_sched_start_with_recovery_error, which more accurately describes the function's purpose. Additionally, it was recommended to add documentation details about the new method. v5: Fixed declaration of new function drm_sched_start_with_recovery_error.(Alex) v6 (chk): rebase on upstream changes, cleanup the commit message, drop the new function again and update all callers, apply the errno also to scheduler fences with hw fences v7 (chk): rebased Signed-off-by: Jesse Zhang Signed-off-by: Vitaly Prosyak Signed-off-by: Christian König Acked-by: Daniel Vetter Reviewed-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240826122541.85663-1-christian.koenig@amd.com --- include/drm/gpu_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index fe8edb917360..a8d19b10f9b8 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -579,7 +579,7 @@ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad); -void drm_sched_start(struct drm_gpu_scheduler *sched); +void drm_sched_start(struct drm_gpu_scheduler *sched, int errno); void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); void drm_sched_increase_karma(struct drm_sched_job *bad); void drm_sched_reset_karma(struct drm_sched_job *bad); -- cgit v1.2.3 From a401bd1264b400f96a4cf61ed3fc144008e97a4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 26 Aug 2024 14:25:39 +0200 Subject: dma-buf: give examples of error codes to use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dma_fence_set_error() function allows to set an error code on a dma_fence object before it is signaled. Document some of the potential error codes drivers should use and especially what they mean. Signed-off-by: Christian König Acked-by: Daniel Vetter Reviewed-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240826122541.85663-2-christian.koenig@amd.com --- include/linux/dma-fence.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index e06bad467f55..e7ad819962e3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -574,6 +574,12 @@ int dma_fence_get_status(struct dma_fence *fence); * rather than success. This must be set before signaling (so that the value * is visible before any waiters on the signal callback are woken). This * helper exists to help catching erroneous setting of #dma_fence.error. + * + * Examples of error codes which drivers should use: + * + * * %-ENODATA This operation produced no data, no other operation affected. + * * %-ECANCELED All operations from the same context have been canceled. + * * %-ETIME Operation caused a timeout and potentially device reset. */ static inline void dma_fence_set_error(struct dma_fence *fence, int error) -- cgit v1.2.3 From 8cf9a01edc216b16b5839eb793ac544d2c97ce97 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 11 Sep 2024 15:42:57 -0400 Subject: fs: Introduce FOP_ASYNC_LOCK Some lock managers (NLM, kNFSD) fastidiously avoid blocking their kernel threads while servicing blocking locks. If a filesystem supports asynchronous lock requests those lock managers can use notifications to quickly inform clients they have acquired a file lock. Historically, only posix_lock_file() was capable of supporting asynchronous locks so the check for support was simply file_operations->lock(), but with recent changes in DLM, both GFS2 and OCFS2 also support asynchronous locks and have started signalling their support with EXPORT_OP_ASYNC_LOCK. We recently noticed that those changes dropped the checks for whether a filesystem simply defaults to posix_lock_file(), so async lock notifications have not been attempted for NLM and NFSv4.1+ for most filesystems. While trying to fix this it has become clear that testing both the export flag combined with testing ->lock() creates quite a layering mess. It seems appropriate to signal support with a fop_flag. Add FOP_ASYNC_LOCK so that filesystems with ->lock() can signal their capability to handle lock requests asynchronously. Add a helper for lock managers to properly test that support. Signed-off-by: Benjamin Coddington Link: https://lore.kernel.org/r/3330d5a324abe2ce9c1dafe89cacdc6db41945d1.1726083391.git.bcodding@redhat.com Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/filelock.h | 5 +++++ include/linux/fs.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/filelock.h b/include/linux/filelock.h index daee999d05f3..58c1120a8253 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl) wake_up(&fl->c.flc_wait); } +static inline bool locks_can_async_lock(const struct file_operations *fops) +{ + return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK; +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ca11e241a24..78221ae589d9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2074,6 +2074,8 @@ struct file_operations { #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) /* Contains huge pages */ #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) +/* Supports asynchronous lock callbacks */ +#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 5)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, -- cgit v1.2.3 From 319e53f155907cf2c6dabc16ec9dce0179bc04d1 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 16 Sep 2024 19:00:08 -0400 Subject: drm/panic: Fix uninitialized spinlock acquisition with CONFIG_DRM_PANIC=n It turns out that if you happen to have a kernel config where CONFIG_DRM_PANIC is disabled and spinlock debugging is enabled, along with KMS being enabled - we'll end up trying to acquire an uninitialized spin_lock with drm_panic_lock() when we try to do a commit: rvkms rvkms.0: [drm:drm_atomic_commit] committing 0000000068d2ade1 INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 4 PID: 1347 Comm: modprobe Not tainted 6.10.0-rc1Lyude-Test+ #272 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20240524-3.fc40 05/24/2024 Call Trace: dump_stack_lvl+0x77/0xa0 assign_lock_key+0x114/0x120 register_lock_class+0xa8/0x2c0 __lock_acquire+0x7d/0x2bd0 ? __vmap_pages_range_noflush+0x3a8/0x550 ? drm_atomic_helper_swap_state+0x2ad/0x3a0 lock_acquire+0xec/0x290 ? drm_atomic_helper_swap_state+0x2ad/0x3a0 ? lock_release+0xee/0x310 _raw_spin_lock_irqsave+0x4e/0x70 ? drm_atomic_helper_swap_state+0x2ad/0x3a0 drm_atomic_helper_swap_state+0x2ad/0x3a0 drm_atomic_helper_commit+0xb1/0x270 drm_atomic_commit+0xaf/0xe0 ? __pfx___drm_printfn_info+0x10/0x10 drm_client_modeset_commit_atomic+0x1a1/0x250 drm_client_modeset_commit_locked+0x4b/0x180 drm_client_modeset_commit+0x27/0x50 __drm_fb_helper_restore_fbdev_mode_unlocked+0x76/0x90 drm_fb_helper_set_par+0x38/0x40 fbcon_init+0x3c4/0x690 visual_init+0xc0/0x120 do_bind_con_driver+0x409/0x4c0 do_take_over_console+0x233/0x280 do_fb_registered+0x11f/0x210 fbcon_fb_registered+0x2c/0x60 register_framebuffer+0x248/0x2a0 __drm_fb_helper_initial_config_and_unlock+0x58a/0x720 drm_fbdev_generic_client_hotplug+0x6e/0xb0 drm_client_register+0x76/0xc0 _RNvXs_CsHeezP08sTT_5rvkmsNtB4_5RvkmsNtNtCs1cdwasc6FUb_6kernel8platform6Driver5probe+0xed2/0x1060 [rvkms] ? _RNvMs_NtCs1cdwasc6FUb_6kernel8platformINtB4_7AdapterNtCsHeezP08sTT_5rvkms5RvkmsE14probe_callbackBQ_+0x2b/0x70 [rvkms] ? acpi_dev_pm_attach+0x25/0x110 ? platform_probe+0x6a/0xa0 ? really_probe+0x10b/0x400 ? __driver_probe_device+0x7c/0x140 ? driver_probe_device+0x22/0x1b0 ? __device_attach_driver+0x13a/0x1c0 ? __pfx___device_attach_driver+0x10/0x10 ? bus_for_each_drv+0x114/0x170 ? __device_attach+0xd6/0x1b0 ? bus_probe_device+0x9e/0x120 ? device_add+0x288/0x4b0 ? platform_device_add+0x75/0x230 ? platform_device_register_full+0x141/0x180 ? rust_helper_platform_device_register_simple+0x85/0xb0 ? _RNvMs2_NtCs1cdwasc6FUb_6kernel8platformNtB5_6Device13create_simple+0x1d/0x60 ? _RNvXs0_CsHeezP08sTT_5rvkmsNtB5_5RvkmsNtCs1cdwasc6FUb_6kernel6Module4init+0x11e/0x160 [rvkms] ? 0xffffffffc083f000 ? init_module+0x20/0x1000 [rvkms] ? kernfs_xattr_get+0x3e/0x80 ? do_one_initcall+0x148/0x3f0 ? __lock_acquire+0x5ef/0x2bd0 ? __lock_acquire+0x5ef/0x2bd0 ? __lock_acquire+0x5ef/0x2bd0 ? put_cpu_partial+0x51/0x1d0 ? lock_acquire+0xec/0x290 ? put_cpu_partial+0x51/0x1d0 ? lock_release+0xee/0x310 ? put_cpu_partial+0x51/0x1d0 ? fs_reclaim_acquire+0x69/0xf0 ? lock_acquire+0xec/0x290 ? fs_reclaim_acquire+0x69/0xf0 ? kfree+0x22f/0x340 ? lock_release+0xee/0x310 ? kmalloc_trace_noprof+0x48/0x340 ? do_init_module+0x22/0x240 ? kmalloc_trace_noprof+0x155/0x340 ? do_init_module+0x60/0x240 ? __se_sys_finit_module+0x2e0/0x3f0 ? do_syscall_64+0xa4/0x180 ? syscall_exit_to_user_mode+0x108/0x140 ? do_syscall_64+0xb0/0x180 ? vma_end_read+0xd0/0xe0 ? do_user_addr_fault+0x309/0x640 ? clear_bhb_loop+0x45/0xa0 ? clear_bhb_loop+0x45/0xa0 ? clear_bhb_loop+0x45/0xa0 ? entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix this by stubbing these macros out when this config option isn't enabled, along with fixing the unused variable warning that introduces. Signed-off-by: Lyude Paul Reviewed-by: Daniel Vetter Fixes: e2a1cda3e0c7 ("drm/panic: Add drm panic locking") Cc: # v6.10+ Link: https://patchwork.freedesktop.org/patch/msgid/20240916230103.611490-1-lyude@redhat.com --- include/drm/drm_panic.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index 54085d5d05c3..f4e1fa9ae607 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -64,6 +64,8 @@ struct drm_scanout_buffer { }; +#ifdef CONFIG_DRM_PANIC + /** * drm_panic_trylock - try to enter the panic printing critical section * @dev: struct drm_device @@ -149,4 +151,16 @@ struct drm_scanout_buffer { #define drm_panic_unlock(dev, flags) \ raw_spin_unlock_irqrestore(&(dev)->mode_config.panic_lock, flags) +#else + +static inline bool drm_panic_trylock(struct drm_device *dev, unsigned long flags) +{ + return true; +} + +static inline void drm_panic_lock(struct drm_device *dev, unsigned long flags) {} +static inline void drm_panic_unlock(struct drm_device *dev, unsigned long flags) {} + +#endif + #endif /* __DRM_PANIC_H__ */ -- cgit v1.2.3 From 318580ad7f2828f6269e0b8819e943ddedda3375 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 29 Aug 2024 14:41:09 +0800 Subject: hugetlbfs: support tracepoint Add basic tracepoints for {alloc, evict, free}_inode, setattr and fallocate. These can help users to debug hugetlbfs more conveniently. Signed-off-by: Hongbo Li Link: https://lore.kernel.org/r/20240829064110.67884-2-lihongbo22@huawei.com Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christian Brauner --- include/trace/events/hugetlbfs.h | 156 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 include/trace/events/hugetlbfs.h (limited to 'include') diff --git a/include/trace/events/hugetlbfs.h b/include/trace/events/hugetlbfs.h new file mode 100644 index 000000000000..8331c904a9ba --- /dev/null +++ b/include/trace/events/hugetlbfs.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hugetlbfs + +#if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HUGETLBFS_H + +#include + +TRACE_EVENT(hugetlbfs_alloc_inode, + + TP_PROTO(struct inode *inode, struct inode *dir, int mode), + + TP_ARGS(inode, dir, mode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(ino_t, dir) + __field(__u16, mode) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->dir = dir->i_ino; + __entry->mode = mode; + ), + + TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned long) __entry->dir, __entry->mode) +); + +DECLARE_EVENT_CLASS(hugetlbfs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(__u16, mode) + __field(loff_t, size) + __field(unsigned int, nlink) + __field(unsigned int, seals) + __field(blkcnt_t, blocks) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = inode->i_mode; + __entry->size = inode->i_size; + __entry->nlink = inode->i_nlink; + __entry->seals = HUGETLBFS_I(inode)->seals; + __entry->blocks = inode->i_blocks; + ), + + TP_printk("dev %d,%d ino %lu mode 0%o size %lld nlink %u seals %u blocks %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, + __entry->mode, __entry->size, __entry->nlink, __entry->seals, + (unsigned long long)__entry->blocks) +); + +DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(hugetlbfs_setattr, + + TP_PROTO(struct inode *inode, struct dentry *dentry, + struct iattr *attr), + + TP_ARGS(inode, dentry, attr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, d_len) + __string(d_name, dentry->d_name.name) + __field(unsigned int, ia_valid) + __field(unsigned int, ia_mode) + __field(loff_t, old_size) + __field(loff_t, ia_size) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->d_len = dentry->d_name.len; + __assign_str(d_name); + __entry->ia_valid = attr->ia_valid; + __entry->ia_mode = attr->ia_mode; + __entry->old_size = inode->i_size; + __entry->ia_size = attr->ia_size; + ), + + TP_printk("dev %d,%d ino %lu name %.*s valid %#x mode 0%o old_size %lld size %lld", + MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino, + __entry->d_len, __get_str(d_name), __entry->ia_valid, __entry->ia_mode, + __entry->old_size, __entry->ia_size) +); + +TRACE_EVENT(hugetlbfs_fallocate, + + TP_PROTO(struct inode *inode, int mode, + loff_t offset, loff_t len, int ret), + + TP_ARGS(inode, mode, offset, len, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, mode) + __field(loff_t, offset) + __field(loff_t, len) + __field(loff_t, size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = mode; + __entry->offset = offset; + __entry->len = len; + __entry->size = inode->i_size; + __entry->ret = ret; + ), + + TP_printk("dev %d,%d ino %lu mode 0%o offset %lld len %lld size %lld ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long)__entry->ino, __entry->mode, + (unsigned long long)__entry->offset, + (unsigned long long)__entry->len, + (unsigned long long)__entry->size, + __entry->ret) +); + +#endif /* _TRACE_HUGETLBFS_H */ + + /* This part must be outside protection */ +#include -- cgit v1.2.3 From 2b0996c7646293c71c1297c00e07e54cee9bec5c Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Fri, 6 Sep 2024 12:53:59 -0700 Subject: wifi: ath9k: remove ath9k_platform_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completely unused here in favor of Device Tree based setup. The DT code in here should currently match what is available with platform files. Any such lapse can always be added. Signed-off-by: Rosen Penev Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240906195359.6982-4-rosenp@gmail.com --- include/linux/ath9k_platform.h | 51 ------------------------------------------ 1 file changed, 51 deletions(-) delete mode 100644 include/linux/ath9k_platform.h (limited to 'include') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h deleted file mode 100644 index 76860a461ed2..000000000000 --- a/include/linux/ath9k_platform.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2008 Atheros Communications Inc. - * Copyright (c) 2009 Gabor Juhos - * Copyright (c) 2009 Imre Kaloz - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef _LINUX_ATH9K_PLATFORM_H -#define _LINUX_ATH9K_PLATFORM_H - -#define ATH9K_PLAT_EEP_MAX_WORDS 2048 - -struct ath9k_platform_data { - const char *eeprom_name; - - u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS]; - u8 *macaddr; - - int led_pin; - u32 gpio_mask; - u32 gpio_val; - - u32 bt_active_pin; - u32 bt_priority_pin; - u32 wlan_active_pin; - - bool endian_check; - bool is_clk_25mhz; - bool tx_gain_buffalo; - bool disable_2ghz; - bool disable_5ghz; - bool led_active_high; - - int (*get_mac_revision)(void); - int (*external_reset)(void); - - bool use_eeprom; -}; - -#endif /* _LINUX_ATH9K_PLATFORM_H */ -- cgit v1.2.3 From 5b40191152282e1f25d7b9826bcda41be927b39f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 6 Sep 2024 15:06:03 +0300 Subject: drm/xe/pciids: Add PVC's PCI device ID macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PVC PCI IDs to the xe_pciids.h header. They're not yet used in the driver. Cc: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Thomas Hellström Reviewed-by: Lucas De Marchi Acked-by: Simona Vetter Signed-off-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/6ac1829493a53a3fec889c746648d627a0296892.1725624296.git.jani.nikula@intel.com --- include/drm/intel/xe_pciids.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 79001afa7d27..67baa7c2246a 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -191,6 +191,22 @@ MACRO__(0x7D60, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) +/* PVC */ +#define XE_PVC_IDS(MACRO__, ...) \ + MACRO__(0x0B69, ## __VA_ARGS__), \ + MACRO__(0x0B6E, ## __VA_ARGS__), \ + MACRO__(0x0BD4, ## __VA_ARGS__), \ + MACRO__(0x0BD5, ## __VA_ARGS__), \ + MACRO__(0x0BD6, ## __VA_ARGS__), \ + MACRO__(0x0BD7, ## __VA_ARGS__), \ + MACRO__(0x0BD8, ## __VA_ARGS__), \ + MACRO__(0x0BD9, ## __VA_ARGS__), \ + MACRO__(0x0BDA, ## __VA_ARGS__), \ + MACRO__(0x0BDB, ## __VA_ARGS__), \ + MACRO__(0x0BE0, ## __VA_ARGS__), \ + MACRO__(0x0BE1, ## __VA_ARGS__), \ + MACRO__(0x0BE5, ## __VA_ARGS__) + #define XE_LNL_IDS(MACRO__, ...) \ MACRO__(0x6420, ## __VA_ARGS__), \ MACRO__(0x64A0, ## __VA_ARGS__), \ -- cgit v1.2.3 From 66b281fd8e599ddd7a00a89dc0dcfb7a13411441 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Tue, 10 Sep 2024 11:53:01 +0530 Subject: drm/i915/pciid: Add new PCI id for ARL Add new PCI id for ARL platform. Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Nemesa Garg Reviewed-by: Sai Teja Pottumuttu Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240910062301.2006782-1-dnyaneshwar.bhadane@intel.com --- include/drm/intel/i915_pciids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index cbb12fdbcb7f..02156c6f79b6 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -769,7 +769,8 @@ MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__) + MACRO__(0x7DD1, ## __VA_ARGS__), \ + MACRO__(0xB640, ## __VA_ARGS__) /* MTL */ #define INTEL_MTL_IDS(MACRO__, ...) \ -- cgit v1.2.3 From 35667a0330612bb25a689e4d3a687d47cede1d7a Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Thu, 12 Sep 2024 17:29:06 +0530 Subject: drm/xe/pciid: Add new PCI id for ARL Add new PCI id for ARL platform. v2: Fix typo in PCI id (SaiTeja) Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Sai Teja Pottumuttu Reviewed-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240912115906.2730577-1-dnyaneshwar.bhadane@intel.com --- include/drm/intel/xe_pciids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 67baa7c2246a..65520a90c17c 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -181,7 +181,8 @@ MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__) + MACRO__(0x7DD1, ## __VA_ARGS__), \ + MACRO__(0xB640, ## __VA_ARGS__) /* MTL */ #define XE_MTL_IDS(MACRO__, ...) \ -- cgit v1.2.3 From 1e436f4fff1fd1fcc904ee18139f7e284001dc81 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 17 Sep 2024 14:47:32 +0000 Subject: drm/scheduler: Improve documentation Function drm_sched_entity_push_job() doesn't have a return value, remove the return value description for it. Correct several other typo errors. v2 (Philipp): - more correction with related comments. Signed-off-by: Shuicheng Lin Reviewed-by: Philipp Stanner Signed-off-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240917144732.2758572-1-shuicheng.lin@intel.com --- include/drm/gpu_scheduler.h | 12 ++++++------ include/linux/dma-resv.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index a8d19b10f9b8..f83e1209b11d 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -33,11 +33,11 @@ #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) /** - * DRM_SCHED_FENCE_DONT_PIPELINE - Prefent dependency pipelining + * DRM_SCHED_FENCE_DONT_PIPELINE - Prevent dependency pipelining * * Setting this flag on a scheduler fence prevents pipelining of jobs depending * on this fence. In other words we always insert a full CPU round trip before - * dependen jobs are pushed to the hw queue. + * dependent jobs are pushed to the hw queue. */ #define DRM_SCHED_FENCE_DONT_PIPELINE DMA_FENCE_FLAG_USER_BITS @@ -71,7 +71,7 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_COUNT }; -/* Used to chose between FIFO and RR jobs scheduling */ +/* Used to choose between FIFO and RR job-scheduling */ extern int drm_sched_policy; #define DRM_SCHED_POLICY_RR 0 @@ -198,7 +198,7 @@ struct drm_sched_entity { * * Points to the finished fence of the last scheduled job. Only written * by the scheduler thread, can be accessed locklessly from - * drm_sched_job_arm() iff the queue is empty. + * drm_sched_job_arm() if the queue is empty. */ struct dma_fence __rcu *last_scheduled; @@ -247,7 +247,7 @@ struct drm_sched_entity { * @sched: the scheduler to which this rq belongs to. * @entities: list of the entities to be scheduled. * @current_entity: the entity which is to be scheduled. - * @rb_tree_root: root of time based priory queue of entities for FIFO scheduling + * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling * * Run queue is a set of entities scheduling command submissions for * one specific ring. It implements the scheduling policy that selects @@ -321,7 +321,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * @s_fence: contains the fences for the scheduling of job. * @finish_cb: the callback for the finished fence. * @credits: the number of credits this job contributes to the scheduler - * @work: Helper to reschdeule job kill to different context. + * @work: Helper to reschedule job kill to different context. * @id: a unique id assigned to each job scheduled on the scheduler. * @karma: increment on every hang caused by this job. If this exceeds the hang * limit of the scheduler then the job is marked guilty and will not diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 8d0e34dad446..c5ab6fd9ebe8 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -105,10 +105,10 @@ enum dma_resv_usage { * This should be used by submissions which don't want to participate in * any implicit synchronization. * - * The most common case are preemption fences, page table updates, TLB - * flushes as well as explicit synced user submissions. + * The most common cases are preemption fences, page table updates, TLB + * flushes as well as explicitly synced user submissions. * - * Explicit synced user user submissions can be promoted to + * Explicitly synced user submissions can be promoted to * DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE as needed using * dma_buf_import_sync_file() when implicit synchronization should * become necessary after initial adding of the fence. -- cgit v1.2.3 From 0992b2541e1cd9580c2e70fab7a78558de054bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:08 -0300 Subject: drm/gem: Create a drm_gem_object_init_with_mnt() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some applications, such as applications that uses huge pages, we might want to have a different mountpoint, for which we pass mount flags that better match our usecase. Therefore, create a new function `drm_gem_object_init_with_mnt()` that allow us to define the tmpfs mountpoint where the GEM object will be created. If this parameter is NULL, then we fallback to `shmem_file_setup()`. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-5-mcanal@igalia.com --- include/drm/drm_gem.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index bae4865b2101..2ebf6e10cc44 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -472,6 +472,9 @@ void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); +int drm_gem_object_init_with_mnt(struct drm_device *dev, + struct drm_gem_object *obj, size_t size, + struct vfsmount *gemfs); void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); void drm_gem_private_object_fini(struct drm_gem_object *obj); -- cgit v1.2.3 From be431dfec976e553a08883e26d0d0cc2598a8dfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:10 -0300 Subject: drm/gem: Create shmem GEM object in a given mountpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a function `drm_gem_shmem_create_with_mnt()`, similar to `drm_gem_shmem_create()`, that has a mountpoint as a argument. This function will create a shmem GEM object in a given tmpfs mountpoint. This function will be useful for drivers that have a special mountpoint with flags enabled. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-7-mcanal@igalia.com --- include/drm/drm_gem_shmem_helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index efbc9f27312b..d22e3fb53631 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -97,6 +97,9 @@ struct drm_gem_shmem_object { container_of(obj, struct drm_gem_shmem_object, base) struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); +struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, + size_t size, + struct vfsmount *gemfs); void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); -- cgit v1.2.3 From 9f8e1c93a0d459463819d8bd222196b2655c279f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:15 -0300 Subject: drm/v3d: Expose Super Pages capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new V3D parameter to expose the support of Super Pages to userspace. The userspace might want to know this information to apply optimizations that are specific to kernels with Super Pages enabled. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-12-mcanal@igalia.com --- include/uapi/drm/v3d_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 87fc5bb0a61e..2376c73abca1 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -290,6 +290,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, DRM_V3D_PARAM_MAX_PERF_COUNTERS, + DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, }; struct drm_v3d_get_param { -- cgit v1.2.3 From f73716fd4550d588a811f11c370e90c303f0829b Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 9 Sep 2024 08:48:20 +0200 Subject: drm/panthor: Add PANTHOR_GROUP_PRIORITY_REALTIME group priority This adds a new value to drm_panthor_group_priority exposing the realtime priority to userspace. This is required to implement NV_context_priority_realtime in Mesa. v2: - Add Steven Price r-b v3: - Add Boris Brezillon r-b Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-3-mary.guillemard@collabora.com --- include/uapi/drm/panthor_drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 1fd8473548ac..011a555e4674 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -720,6 +720,13 @@ enum drm_panthor_group_priority { * Requires CAP_SYS_NICE or DRM_MASTER. */ PANTHOR_GROUP_PRIORITY_HIGH, + + /** + * @PANTHOR_GROUP_PRIORITY_REALTIME: Realtime priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_REALTIME, }; /** -- cgit v1.2.3 From f70000ef23527f6d928d1175c66c5fafa968814b Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 9 Sep 2024 08:48:21 +0200 Subject: drm/panthor: Add DEV_QUERY_GROUP_PRIORITIES_INFO dev query Expose allowed group priorities with a new device query. This new uAPI will be used in Mesa to properly report what priorities a user can use for EGL_IMG_context_priority. Since this extends the uAPI and because userland needs a way to advertise priorities accordingly, this also bumps the driver minor version. v2: - Remove drm_panthor_group_allow_priority_flags definition - Document that allowed_mask is a bitmask of drm_panthor_group_priority v3: - Use BIT macro in panthor_query_group_priorities_info - Add r-b from Steven Price and Boris Brezillon Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-4-mary.guillemard@collabora.com --- include/uapi/drm/panthor_drm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 011a555e4674..87c9cb555dd1 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -263,6 +263,11 @@ enum drm_panthor_dev_query_type { /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */ DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, + + /** + * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information. + */ + DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, }; /** @@ -399,6 +404,23 @@ struct drm_panthor_timestamp_info { __u64 timestamp_offset; }; +/** + * struct drm_panthor_group_priorities_info - Group priorities information + * + * Structure grouping all queryable information relating to the allowed group priorities. + */ +struct drm_panthor_group_priorities_info { + /** + * @allowed_mask: Bitmask of the allowed group priorities. + * + * Each bit represents a variant of the enum drm_panthor_group_priority. + */ + __u8 allowed_mask; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ -- cgit v1.2.3 From b4ad4ef374d66cc8df3188bb1ddb65bce5fc9e50 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 16 Sep 2024 15:33:20 +0200 Subject: gpu: host1x: Set up device DMA parameters In order to store device DMA parameters, the DMA framework depends on the device's dma_parms field to point at a valid memory location. Add backing storage for this in struct host1x_memory_context and point to it. Reported-by: Jonathan Hunter Reviewed-by: Christoph Hellwig Tested-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240916133320.368620-1-thierry.reding@gmail.com --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 5a7a81e5f9bd..9fa9c30a34e6 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -471,6 +471,7 @@ struct host1x_memory_context { refcount_t ref; struct pid *owner; + struct device_dma_parameters dma_parms; struct device dev; u64 dma_mask; u32 stream_id; -- cgit v1.2.3 From eb1f4adf9101573fc2347978a60d71c4f1176cca Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:11:59 +0200 Subject: drm/fbdev-helper: Move color-mode lookup into 4CC format helper The color mode as specified on the kernel command line gives the user's preferred color depth and number of bits per pixel. Move the color-mode-to-format conversion from fbdev helpers into a 4CC helper, so that it can be shared among DRM clients. v2: - fix grammar in commit message (Laurent) Signed-off-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-2-tzimmermann@suse.de --- include/drm/drm_fourcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index ccf91daa4307..c3f4405d6662 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -313,6 +313,7 @@ drm_get_format_info(struct drm_device *dev, uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth); uint32_t drm_driver_legacy_fb_format(struct drm_device *dev, uint32_t bpp, uint32_t depth); +uint32_t drm_driver_color_mode_format(struct drm_device *dev, unsigned int color_mode); unsigned int drm_format_info_block_width(const struct drm_format_info *info, int plane); unsigned int drm_format_info_block_height(const struct drm_format_info *info, -- cgit v1.2.3 From 5d08c44e47b9d41366714552bdd374ac4b595591 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:12:01 +0200 Subject: drm/fbdev: Add memory-agnostic fbdev client Add an fbdev client that can work with any memory manager. The client implementation is the same as existing code in fbdev-dma or fbdev-shmem. Provide struct drm_driver.fbdev_probe for the new client to allocate the surface GEM buffer. The new callback replaces fb_probe of struct drm_fb_helper_funcs, which does the same. To use the new client, DRM drivers set fbdev_probe in their struct drm_driver instance and call drm_fbdev_client_setup(). Probing and creating the fbdev surface buffer is now independent from the other operations in struct drm_fb_helper. For the pixel format, the fbdev client either uses a specified format, the value in preferred_depth or 32-bit RGB. v2: - test for struct drm_fb_helper.funcs for NULL (Sui) - respect struct drm_mode_config.preferred_depth for default format Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-4-tzimmermann@suse.de --- include/drm/drm_drv.h | 18 ++++++++++++++++++ include/drm/drm_fbdev_client.h | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 include/drm/drm_fbdev_client.h (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 02ea4e3248fd..36a606af4ba1 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -34,6 +34,8 @@ #include +struct drm_fb_helper; +struct drm_fb_helper_surface_size; struct drm_file; struct drm_gem_object; struct drm_master; @@ -366,6 +368,22 @@ struct drm_driver { struct drm_device *dev, uint32_t handle, uint64_t *offset); + /** + * @fbdev_probe + * + * Allocates and initialize the fb_info structure for fbdev emulation. + * Furthermore it also needs to allocate the DRM framebuffer used to + * back the fbdev. + * + * This callback is mandatory for fbdev support. + * + * Returns: + * + * 0 on success ot a negative error code otherwise. + */ + int (*fbdev_probe)(struct drm_fb_helper *fbdev_helper, + struct drm_fb_helper_surface_size *sizes); + /** * @show_fdinfo: * diff --git a/include/drm/drm_fbdev_client.h b/include/drm/drm_fbdev_client.h new file mode 100644 index 000000000000..e11a5614f127 --- /dev/null +++ b/include/drm/drm_fbdev_client.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_FBDEV_CLIENT_H +#define DRM_FBDEV_CLIENT_H + +struct drm_device; +struct drm_format_info; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +int drm_fbdev_client_setup(struct drm_device *dev, const struct drm_format_info *format); +#else +static inline int drm_fbdev_client_setup(struct drm_device *dev, + const struct drm_format_info *format) +{ + return 0; +} +#endif + +#endif -- cgit v1.2.3 From d07fdf9225922d3e36ebd13ccab3df62b1ccdab3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:12:02 +0200 Subject: drm: Add client-agnostic setup helper DRM may support multiple in-kernel clients that run as soon as a DRM driver has been registered. To select the client(s) in a single place, introduce drm_client_setup(). Drivers that call the new helper automatically instantiate the kernel's configured default clients. Only fbdev emulation is currently supported. Later versions can add support for DRM-based logging, a boot logo or even a console. Some drivers handle the color mode for clients internally. Provide the helper drm_client_setup_with_color_mode() for them. Using the new interface requires the driver to select DRM_CLIENT_SELECTION in its Kconfig. For now this only enables the client-setup helpers if the fbdev client has been configured by the user. A future patchset will further modularize client support and rework DRM_CLIENT_SELECTION to select the correct dependencies for all its clients. v5: - add CONFIG_DRM_CLIENT_SELECTION und DRM_CLIENT_SETUP v4: - fix docs for drm_client_setup_with_fourcc() (Geert) v3: - fix build error v2: - add drm_client_setup_with_fourcc() (Laurent) - push default-format handling into actual clients Signed-off-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-5-tzimmermann@suse.de --- include/drm/drm_client_setup.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/drm/drm_client_setup.h (limited to 'include') diff --git a/include/drm/drm_client_setup.h b/include/drm/drm_client_setup.h new file mode 100644 index 000000000000..46aab3fb46be --- /dev/null +++ b/include/drm/drm_client_setup.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_CLIENT_SETUP_H +#define DRM_CLIENT_SETUP_H + +#include + +struct drm_device; +struct drm_format_info; + +#if defined(CONFIG_DRM_CLIENT_SETUP) +void drm_client_setup(struct drm_device *dev, const struct drm_format_info *format); +void drm_client_setup_with_fourcc(struct drm_device *dev, u32 fourcc); +void drm_client_setup_with_color_mode(struct drm_device *dev, unsigned int color_mode); +#else +static inline void drm_client_setup(struct drm_device *dev, + const struct drm_format_info *format) +{ } +static inline void drm_client_setup_with_fourcc(struct drm_device *dev, u32 fourcc) +{ } +static inline void drm_client_setup_with_color_mode(struct drm_device *dev, + unsigned int color_mode) +{ } +#endif + +#endif -- cgit v1.2.3 From 8998eedda2539d2528cfebdc7c17eed0ad35b714 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:12:03 +0200 Subject: drm/fbdev-dma: Support struct drm_driver.fbdev_probe Rework fbdev probing to support fbdev_probe in struct drm_driver and reimplement the old fb_probe callback on top of it. Provide an initializer macro for struct drm_driver that sets the callback according to the kernel configuration. This change allows the common fbdev client to run on top of DMA- based DRM drivers. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-6-tzimmermann@suse.de --- include/drm/drm_fbdev_dma.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/drm/drm_fbdev_dma.h b/include/drm/drm_fbdev_dma.h index 2da7ee784133..6ae4de46497c 100644 --- a/include/drm/drm_fbdev_dma.h +++ b/include/drm/drm_fbdev_dma.h @@ -4,12 +4,24 @@ #define DRM_FBDEV_DMA_H struct drm_device; +struct drm_fb_helper; +struct drm_fb_helper_surface_size; #ifdef CONFIG_DRM_FBDEV_EMULATION +int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes); + +#define DRM_FBDEV_DMA_DRIVER_OPS \ + .fbdev_probe = drm_fbdev_dma_driver_fbdev_probe + void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp); #else static inline void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp) { } + +#define DRM_FBDEV_DMA_DRIVER_OPS \ + .fbdev_probe = NULL + #endif #endif -- cgit v1.2.3 From 731fddf4302ec00871fd5ae252c0aa765d61a9ad Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:12:46 +0200 Subject: drm/fbdev-dma: Remove obsolete setup function The old setup function drm_fbdev_dma_setup() is unused. Remove it and its internal callbacks. New drivers should call drm_client_setup() instead. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-49-tzimmermann@suse.de --- include/drm/drm_fbdev_dma.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fbdev_dma.h b/include/drm/drm_fbdev_dma.h index 6ae4de46497c..fb3f2a9aa01a 100644 --- a/include/drm/drm_fbdev_dma.h +++ b/include/drm/drm_fbdev_dma.h @@ -3,7 +3,6 @@ #ifndef DRM_FBDEV_DMA_H #define DRM_FBDEV_DMA_H -struct drm_device; struct drm_fb_helper; struct drm_fb_helper_surface_size; @@ -13,15 +12,9 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, #define DRM_FBDEV_DMA_DRIVER_OPS \ .fbdev_probe = drm_fbdev_dma_driver_fbdev_probe - -void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp); #else -static inline void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp) -{ } - #define DRM_FBDEV_DMA_DRIVER_OPS \ .fbdev_probe = NULL - #endif #endif -- cgit v1.2.3 From f0f195d1a3aedef126c3ed159712ed57a34daa1c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:12:47 +0200 Subject: drm/fbdev-shmem: Support struct drm_driver.fbdev_probe Rework fbdev probing to support fbdev_probe in struct drm_driver and reimplement the old fb_probe callback on top of it. Provide an initializer macro for struct drm_driver that sets the callback according to the kernel configuration. This change allows the common fbdev client to run on top of SHMEM- based DRM drivers. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-50-tzimmermann@suse.de --- include/drm/drm_fbdev_shmem.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h index fb43cadd1950..3a5d1efa9d55 100644 --- a/include/drm/drm_fbdev_shmem.h +++ b/include/drm/drm_fbdev_shmem.h @@ -4,12 +4,23 @@ #define DRM_FBDEV_SHMEM_H struct drm_device; +struct drm_fb_helper; +struct drm_fb_helper_surface_size; #ifdef CONFIG_DRM_FBDEV_EMULATION +int drm_fbdev_shmem_driver_fbdev_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes); + +#define DRM_FBDEV_SHMEM_DRIVER_OPS \ + .fbdev_probe = drm_fbdev_shmem_driver_fbdev_probe + void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp); #else static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp) { } + +#define DRM_FBDEV_SHMEM_DRIVER_OPS \ + .fbdev_probe = NULL #endif #endif -- cgit v1.2.3 From bf0978203a746137ce5074a465f83a6cf12e813f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:13:01 +0200 Subject: drm/fbdev-shmem: Remove obsolete setup function The old setup function drm_fbdev_shmem_setup() is unused. Remove it and its internal callbacks. New drivers should call drm_client_setup() instead. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-64-tzimmermann@suse.de --- include/drm/drm_fbdev_shmem.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h index 3a5d1efa9d55..2fc708964d75 100644 --- a/include/drm/drm_fbdev_shmem.h +++ b/include/drm/drm_fbdev_shmem.h @@ -3,7 +3,6 @@ #ifndef DRM_FBDEV_SHMEM_H #define DRM_FBDEV_SHMEM_H -struct drm_device; struct drm_fb_helper; struct drm_fb_helper_surface_size; @@ -13,12 +12,7 @@ int drm_fbdev_shmem_driver_fbdev_probe(struct drm_fb_helper *fb_helper, #define DRM_FBDEV_SHMEM_DRIVER_OPS \ .fbdev_probe = drm_fbdev_shmem_driver_fbdev_probe - -void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp); #else -static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp) -{ } - #define DRM_FBDEV_SHMEM_DRIVER_OPS \ .fbdev_probe = NULL #endif -- cgit v1.2.3 From c7c1b9e1d52b0a0dbb0ee552efdc3360c0f5363c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:13:02 +0200 Subject: drm/fbdev-ttm: Support struct drm_driver.fbdev_probe Rework fbdev probing to support fbdev_probe in struct drm_driver and reimplement the old fb_probe callback on top of it. Provide an initializer macro for struct drm_driver that sets the callback according to the kernel configuration. This change allows the common fbdev client to run on top of TTM- based DRM drivers. Signed-off-by: Thomas Zimmermann Acked-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-65-tzimmermann@suse.de --- include/drm/drm_fbdev_ttm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h index 9e6c3bdf3537..243685d02eb1 100644 --- a/include/drm/drm_fbdev_ttm.h +++ b/include/drm/drm_fbdev_ttm.h @@ -3,11 +3,24 @@ #ifndef DRM_FBDEV_TTM_H #define DRM_FBDEV_TTM_H +#include + struct drm_device; +struct drm_fb_helper; +struct drm_fb_helper_surface_size; #ifdef CONFIG_DRM_FBDEV_EMULATION +int drm_fbdev_ttm_driver_fbdev_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes); + +#define DRM_FBDEV_TTM_DRIVER_OPS \ + .fbdev_probe = drm_fbdev_ttm_driver_fbdev_probe + void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp); #else +#define DRM_FBDEV_TTM_DRIVER_OPS \ + .fbdev_probe = NULL + static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp) { } #endif -- cgit v1.2.3 From 1000634477d8d178179b1ad45d92e925fabe3deb Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 09:13:10 +0200 Subject: drm/fbdev-ttm: Remove obsolete setup function The old setup function drm_fbdev_ttm_setup() is unused. Remove it and its internal callbacks. New drivers should call drm_client_setup() instead. Signed-off-by: Thomas Zimmermann Acked-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-73-tzimmermann@suse.de --- include/drm/drm_fbdev_ttm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h index 243685d02eb1..ad4a10bb4c78 100644 --- a/include/drm/drm_fbdev_ttm.h +++ b/include/drm/drm_fbdev_ttm.h @@ -5,7 +5,6 @@ #include -struct drm_device; struct drm_fb_helper; struct drm_fb_helper_surface_size; @@ -15,14 +14,9 @@ int drm_fbdev_ttm_driver_fbdev_probe(struct drm_fb_helper *fb_helper, #define DRM_FBDEV_TTM_DRIVER_OPS \ .fbdev_probe = drm_fbdev_ttm_driver_fbdev_probe - -void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp); #else #define DRM_FBDEV_TTM_DRIVER_OPS \ .fbdev_probe = NULL - -static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp) -{ } #endif #endif -- cgit v1.2.3 From 1cc2e1faafb3b5a2be25112559bdb495736b5af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Sep 2024 10:57:57 +0200 Subject: pwm: Add more locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ensures that a pwm_chip that has no corresponding driver isn't used and that a driver doesn't go away while a callback is still running. In the presence of device links this isn't necessary yet (so this is no fix) but for pwm character device support this is needed. To not serialize all pwm_apply_state() calls, this introduces a per chip lock. An additional complication is that for atomic chips a mutex cannot be used (as pwm_apply_atomic() must not sleep) and a spinlock cannot be held while calling an operation for a sleeping chip. So depending on the chip being atomic or not a spinlock or a mutex is used. An additional change implemented here is that on driver remove the .free() callback is called for each requested pwm_device. This is the right time because later (e.g. when the consumer calls pwm_put()) the free function is (maybe) not available any more. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/026aa891c8270a11723a1ba7e4256f456f7e1e86.1726819463.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 8acd60b53f58..3ea73e075abe 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -275,6 +275,9 @@ struct pwm_ops { * @of_xlate: request a PWM device given a device tree PWM specifier * @atomic: can the driver's ->apply() be called in atomic context * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip + * @operational: signals if the chip can be used (or is already deregistered) + * @nonatomic_lock: mutex for nonatomic chips + * @atomic_lock: mutex for atomic chips * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { @@ -290,6 +293,16 @@ struct pwm_chip { /* only used internally by the PWM framework */ bool uses_pwmchip_alloc; + bool operational; + union { + /* + * depending on the chip being atomic or not either the mutex or + * the spinlock is used. It protects .operational and + * synchronizes the callbacks in .ops + */ + struct mutex nonatomic_lock; + spinlock_t atomic_lock; + }; struct pwm_device pwms[] __counted_by(npwm); }; -- cgit v1.2.3 From 17e40c25158f2505cbcdeda96624afcbab4af368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Sep 2024 10:57:58 +0200 Subject: pwm: New abstraction for PWM waveforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now the configuration of a PWM setting is described exclusively by a struct pwm_state which contains information about period, duty_cycle, polarity and if the PWM is enabled. (There is another member usage_power which doesn't completely fit into pwm_state, I ignore it here for simplicity.) Instead of a polarity the new abstraction has a member duty_offset_ns that defines when the rising edge happens after the period start. This is more general, as with a pwm_state the rising edge can only happen at the period's start or such that the falling edge is at the end of the period (i.e. duty_offset_ns == 0 or duty_offset_ns == period_length_ns - duty_length_ns). A disabled PWM is modeled by .period_length_ns = 0. In my eyes this is a nice usage of that otherwise unusable setting, as it doesn't define anything about the future which matches the fact that consumers should consider the state of the output as undefined and it's just there to say "No further requirements about the output, you can save some power.". Further I renamed period and duty_cycle to period_length_ns and duty_length_ns. In the past there was confusion from time to time about duty_cycle being measured in nanoseconds because people expected a percentage of period instead. With "length_ns" as suffix the semantic should be more obvious to people unfamiliar with the pwm subsystem. period is renamed to period_length_ns for consistency. The API for consumers doesn't change yet, but lowlevel drivers can implement callbacks that work with pwm_waveforms instead of pwm_states. A new thing about these callbacks is that the calculation of hardware settings needed to implement a certain waveform is separated from actually writing these settings. The motivation for that is that this allows a consumer to query the hardware capabilities without actually modifying the hardware state. The rounding rules that are expected to be implemented in the round_waveform_tohw() are: First pick the biggest possible period not bigger than wf->period_length_ns. For that period pick the biggest possible duty setting not bigger than wf->duty_length_ns. Third pick the biggest possible offset not bigger than wf->duty_offset_ns. If the requested period is too small for the hardware, it's expected that a setting with the minimal period and duty_length_ns = duty_offset_ns = 0 is returned and this fact is signaled by a return value of 1. Signed-off-by: Uwe Kleine-König Tested-by: Trevor Gamblin Link: https://lore.kernel.org/r/df0faa33bf9e7c9e2e5eab8d31bbf61e861bd401.1726819463.git.u.kleine-koenig@baylibre.com [ukleinek: Update pwm_check_rounding() to return bool instead of int.] Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 3ea73e075abe..d8cfe1c9b19d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -49,6 +49,31 @@ enum { PWMF_EXPORTED = 1, }; +/** + * struct pwm_waveform - description of a PWM waveform + * @period_length_ns: PWM period + * @duty_length_ns: PWM duty cycle + * @duty_offset_ns: offset of the rising edge from the period's start + * + * This is a representation of a PWM waveform alternative to struct pwm_state + * below. It's more expressive than struct pwm_state as it contains a + * duty_offset_ns and so can represent offsets other than zero (with .polarity = + * PWM_POLARITY_NORMAL) and period - duty_cycle (.polarity = + * PWM_POLARITY_INVERSED). + * + * Note there is no explicit bool for enabled. A "disabled" PWM is represented + * by .period_length_ns = 0. Note further that the behaviour of a "disabled" PWM + * is undefined. Depending on the hardware's capabilities it might drive the + * active or inactive level, go high-z or even continue to toggle. + * + * The unit for all three members is nanoseconds. + */ +struct pwm_waveform { + u64 period_length_ns; + u64 duty_length_ns; + u64 duty_offset_ns; +}; + /* * struct pwm_state - state of a PWM channel * @period: PWM period (in nanoseconds) @@ -259,6 +284,17 @@ struct pwm_ops { void (*free)(struct pwm_chip *chip, struct pwm_device *pwm); int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); + + size_t sizeof_wfhw; + int (*round_waveform_tohw)(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_waveform *wf, void *wfhw); + int (*round_waveform_fromhw)(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw, struct pwm_waveform *wf); + int (*read_waveform)(struct pwm_chip *chip, struct pwm_device *pwm, + void *wfhw); + int (*write_waveform)(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw); + int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, -- cgit v1.2.3 From 6c5126c6406d1c31e91f5b925c621c1c785366be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Sep 2024 10:57:59 +0200 Subject: pwm: Provide new consumer API functions for waveforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide API functions for consumers to work with waveforms. Note that one relevant difference between pwm_get_state() and pwm_get_waveform*() is that the latter yields the actually configured hardware state, while the former yields the last state passed to pwm_apply*() and so doesn't account for hardware specific rounding. Signed-off-by: Uwe Kleine-König Tested-by: Trevor Gamblin Link: https://lore.kernel.org/r/6c97d27682853f603e18e9196043886dd671845d.1726819463.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d8cfe1c9b19d..c3d9ddeafa65 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -358,7 +358,11 @@ static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) } #if IS_ENABLED(CONFIG_PWM) -/* PWM user APIs */ + +/* PWM consumer APIs */ +int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); +int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); +int pwm_set_waveform_might_sleep(struct pwm_device *pwm, const struct pwm_waveform *wf, bool exact); int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); -- cgit v1.2.3 From 1afd01db1a76cdd1d96696e3790d66c79621784c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Sep 2024 10:58:00 +0200 Subject: pwm: Add tracing for waveform callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds trace events for the recently introduced waveform callbacks. With the introduction of some helper macros consistency among the different events is ensured. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/1d71879b0de3bf01459c7a9d0f040d43eb5ace56.1726819463.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/trace/events/pwm.h | 134 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/pwm.h b/include/trace/events/pwm.h index 8022701c446d..8ba898fd335c 100644 --- a/include/trace/events/pwm.h +++ b/include/trace/events/pwm.h @@ -8,15 +8,135 @@ #include #include +#define TP_PROTO_pwm(args...) \ + TP_PROTO(struct pwm_device *pwm, args) + +#define TP_ARGS_pwm(args...) \ + TP_ARGS(pwm, args) + +#define TP_STRUCT__entry_pwm(args...) \ + TP_STRUCT__entry( \ + __field(unsigned int, chipid) \ + __field(unsigned int, hwpwm) \ + args) + +#define TP_fast_assign_pwm(args...) \ + TP_fast_assign( \ + __entry->chipid = pwm->chip->id; \ + __entry->hwpwm = pwm->hwpwm; \ + args) + +#define TP_printk_pwm(fmt, args...) \ + TP_printk("pwmchip%u.%u: " fmt, __entry->chipid, __entry->hwpwm, args) + +#define __field_pwmwf(wf) \ + __field(u64, wf ## _period_length_ns) \ + __field(u64, wf ## _duty_length_ns) \ + __field(u64, wf ## _duty_offset_ns) \ + +#define fast_assign_pwmwf(wf) \ + __entry->wf ## _period_length_ns = wf->period_length_ns; \ + __entry->wf ## _duty_length_ns = wf->duty_length_ns; \ + __entry->wf ## _duty_offset_ns = wf->duty_offset_ns + +#define printk_pwmwf_format(wf) \ + "%lld/%lld [+%lld]" + +#define printk_pwmwf_formatargs(wf) \ + __entry->wf ## _duty_length_ns, __entry->wf ## _period_length_ns, __entry->wf ## _duty_offset_ns + +TRACE_EVENT(pwm_round_waveform_tohw, + + TP_PROTO_pwm(const struct pwm_waveform *wf, void *wfhw, int err), + + TP_ARGS_pwm(wf, wfhw, err), + + TP_STRUCT__entry_pwm( + __field_pwmwf(wf) + __field(void *, wfhw) + __field(int, err) + ), + + TP_fast_assign_pwm( + fast_assign_pwmwf(wf); + __entry->wfhw = wfhw; + __entry->err = err; + ), + + TP_printk_pwm(printk_pwmwf_format(wf) " > %p err=%d", + printk_pwmwf_formatargs(wf), __entry->wfhw, __entry->err) +); + +TRACE_EVENT(pwm_round_waveform_fromhw, + + TP_PROTO_pwm(const void *wfhw, struct pwm_waveform *wf, int err), + + TP_ARGS_pwm(wfhw, wf, err), + + TP_STRUCT__entry_pwm( + __field(const void *, wfhw) + __field_pwmwf(wf) + __field(int, err) + ), + + TP_fast_assign_pwm( + __entry->wfhw = wfhw; + fast_assign_pwmwf(wf); + __entry->err = err; + ), + + TP_printk_pwm("%p > " printk_pwmwf_format(wf) " err=%d", + __entry->wfhw, printk_pwmwf_formatargs(wf), __entry->err) +); + +TRACE_EVENT(pwm_read_waveform, + + TP_PROTO_pwm(void *wfhw, int err), + + TP_ARGS_pwm(wfhw, err), + + TP_STRUCT__entry_pwm( + __field(void *, wfhw) + __field(int, err) + ), + + TP_fast_assign_pwm( + __entry->wfhw = wfhw; + __entry->err = err; + ), + + TP_printk_pwm("%p err=%d", + __entry->wfhw, __entry->err) +); + +TRACE_EVENT(pwm_write_waveform, + + TP_PROTO_pwm(const void *wfhw, int err), + + TP_ARGS_pwm(wfhw, err), + + TP_STRUCT__entry_pwm( + __field(const void *, wfhw) + __field(int, err) + ), + + TP_fast_assign_pwm( + __entry->wfhw = wfhw; + __entry->err = err; + ), + + TP_printk_pwm("%p err=%d", + __entry->wfhw, __entry->err) +); + + DECLARE_EVENT_CLASS(pwm, TP_PROTO(struct pwm_device *pwm, const struct pwm_state *state, int err), TP_ARGS(pwm, state, err), - TP_STRUCT__entry( - __field(unsigned int, chipid) - __field(unsigned int, hwpwm) + TP_STRUCT__entry_pwm( __field(u64, period) __field(u64, duty_cycle) __field(enum pwm_polarity, polarity) @@ -24,9 +144,7 @@ DECLARE_EVENT_CLASS(pwm, __field(int, err) ), - TP_fast_assign( - __entry->chipid = pwm->chip->id; - __entry->hwpwm = pwm->hwpwm; + TP_fast_assign_pwm( __entry->period = state->period; __entry->duty_cycle = state->duty_cycle; __entry->polarity = state->polarity; @@ -34,8 +152,8 @@ DECLARE_EVENT_CLASS(pwm, __entry->err = err; ), - TP_printk("pwmchip%u.%u: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", - __entry->chipid, __entry->hwpwm, __entry->period, __entry->duty_cycle, + TP_printk_pwm("period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", + __entry->period, __entry->duty_cycle, __entry->polarity, __entry->enabled, __entry->err) ); -- cgit v1.2.3 From caf78b0465053c23aa6211b9815dd5433766627d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 24 Sep 2024 12:08:08 +0200 Subject: regcache: Improve documentation of available cache types There is some user confusion about which cache types to choose when which is not helped by the lack of any central documentation providing an overview of what's available. Provide a short overview in the API header to try to help reduce this. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20240924-regcache-document-types-v1-1-e157054e1215@kernel.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f9ccad32fc5c..da07584284ab 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -54,7 +54,14 @@ struct sdw_slave; #define REGMAP_UPSHIFT(s) (-(s)) #define REGMAP_DOWNSHIFT(s) (s) -/* An enum of all the supported cache types */ +/* + * The supported cache types, the default is no cache. Any new caches + * should usually use the maple tree cache unless they specifically + * require that there are never any allocations at runtime and can't + * provide defaults in which case they should use the flat cache. The + * rbtree cache *may* have some performance advantage for very low end + * systems that make heavy use of cache syncs but is mainly legacy. + */ enum regcache_type { REGCACHE_NONE, REGCACHE_RBTREE, -- cgit v1.2.3 From 5441b6975adc26aeaca316b9075e04a98238b1b3 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 25 Sep 2024 17:38:04 +0800 Subject: regulator: Add of_regulator_get_optional() for pure DT regulator lookup The to-be-introduced I2C component prober needs to enable regulator supplies (and toggle GPIO pins) for the various components it intends to probe. To support this, a new "pure DT lookup" method for getting regulator supplies is needed, since the device normally requesting the supply won't get created until after the component is probed to be available. Add a new of_regulator_get_optional() function for this. This mirrors the existing regulator_get_optional() function, but is OF-specific. The underlying code that supports the existing regulator_get*() functions has been reworked in previous patches to support this specific case. Also convert an existing usage of "dev && dev->of_node" to "dev_of_node(dev)". Link: https://lore.kernel.org/all/20231220203537.83479-2-jernej.skrabec@gmail.com/ [1] Signed-off-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240925093807.1026949-2-wenst@chromium.org Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index b9ce521910a0..2b22f07e491c 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -168,6 +168,19 @@ int devm_regulator_get_enable_read_voltage(struct device *dev, const char *id); void regulator_put(struct regulator *regulator); void devm_regulator_put(struct regulator *regulator); +#if IS_ENABLED(CONFIG_OF) +struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id); +#else +static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} +#endif + int regulator_register_supply_alias(struct device *dev, const char *id, struct device *alias_dev, const char *alias_id); @@ -350,6 +363,13 @@ devm_regulator_get_optional(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline void regulator_put(struct regulator *regulator) { } -- cgit v1.2.3 From 36ec3f437227470568e5f460997f367f5446a34d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 25 Sep 2024 17:38:05 +0800 Subject: regulator: Add devres version of of_regulator_get_optional() There are existing uses for a devres version of of_regulator_get_optional() in power domain drivers. On MediaTek platforms, power domains may have regulator supplies tied to them. The driver currently tries to use devm_regulator_get() to not have to manage the lifecycle, but ends up doing it in a very hacky way by replacing the device node of the power domain controller device to the device node of the power domain that is currently being registered, getting the supply, and reverting the device node. Provide a better API so that the hack can be replaced. Signed-off-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240925093807.1026949-3-wenst@chromium.org Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 2b22f07e491c..8c3c372ad735 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -172,6 +172,9 @@ void devm_regulator_put(struct regulator *regulator); struct regulator *__must_check of_regulator_get_optional(struct device *dev, struct device_node *node, const char *id); +struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id); #else static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, struct device_node *node, @@ -179,6 +182,13 @@ static inline struct regulator *__must_check of_regulator_get_optional(struct de { return ERR_PTR(-ENODEV); } + +static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} #endif int regulator_register_supply_alias(struct device *dev, const char *id, @@ -370,6 +380,13 @@ static inline struct regulator *__must_check of_regulator_get_optional(struct de return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline void regulator_put(struct regulator *regulator) { } -- cgit v1.2.3 From 0809a9ccac4a2ffdfd1561bb551aec6099775545 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 2 Sep 2024 20:59:47 +0800 Subject: spi: remove {devm_}spi_alloc_master/slave() All the {devm_}spi_alloc_master/slave() have been replaced, so they can be removed and replaced in doc and comment. No functional changed. Signed-off-by: Yang Yingliang Link: https://patch.msgid.link/20240902125947.1368-8-yangyingliang@huaweicloud.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4b95663163e0..8497f4747e24 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -824,21 +824,6 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); -static inline struct spi_controller *spi_alloc_master(struct device *host, - unsigned int size) -{ - return __spi_alloc_controller(host, size, false); -} - -static inline struct spi_controller *spi_alloc_slave(struct device *host, - unsigned int size) -{ - if (!IS_ENABLED(CONFIG_SPI_SLAVE)) - return NULL; - - return __spi_alloc_controller(host, size, true); -} - static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) { @@ -858,21 +843,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); -static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, - unsigned int size) -{ - return __devm_spi_alloc_controller(dev, size, false); -} - -static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, - unsigned int size) -{ - if (!IS_ENABLED(CONFIG_SPI_SLAVE)) - return NULL; - - return __devm_spi_alloc_controller(dev, size, true); -} - static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) { -- cgit v1.2.3 From 6074e905023d09f64f2c896f475820a5623deb2c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 16 Sep 2024 13:00:25 +0200 Subject: firmware: sysfb: Add a sysfb_handles_screen_info() helper function That can be used by drivers to check if the Generic System Framebuffers (sysfb) support can handle the data contained in the global screen_info. Drivers might need this information to know if have to setup the system framebuffer, or if they have to delegate this action to sysfb instead. Suggested-by: Thomas Zimmermann Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20240916110040.1688511-2-javierm@redhat.com Signed-off-by: Tzung-Bi Shih --- include/linux/sysfb.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index bef5f06a91de..07cbab516942 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -60,12 +60,19 @@ struct efifb_dmi_info { void sysfb_disable(struct device *dev); +bool sysfb_handles_screen_info(void); + #else /* CONFIG_SYSFB */ static inline void sysfb_disable(struct device *dev) { } +static inline bool sysfb_handles_screen_info(void) +{ + return false; +} + #endif /* CONFIG_SYSFB */ #ifdef CONFIG_EFI -- cgit v1.2.3 From e4ca0e59c39442546866f3dd514a3a5956577daf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Sep 2024 20:59:04 +0300 Subject: types: Complement the aligned types with signed 64-bit one Some user may want to use aligned signed 64-bit type. Provide it for them. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240903180218.3640501-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/types.h | 3 ++- include/uapi/linux/types.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index 2bc8766ba20c..2d7b9ae8714c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -115,8 +115,9 @@ typedef u64 u_int64_t; typedef s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* These are the special 64-bit data types that are 8-byte aligned */ #define aligned_u64 __aligned_u64 +#define aligned_s64 __aligned_s64 #define aligned_be64 __aligned_be64 #define aligned_le64 __aligned_le64 diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 6375a0684052..48b933938877 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -53,6 +53,7 @@ typedef __u32 __bitwise __wsum; * No conversions are necessary between 32-bit user-space and a 64-bit kernel. */ #define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_s64 __s64 __attribute__((aligned(8))) #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) -- cgit v1.2.3 From faf178607772f28006e403d7bab6c4217d4ee447 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 7 Sep 2024 19:24:46 +0200 Subject: iio: adc: Constify struct iio_map 'struct iio_map' are not modified in these drivers. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, the prototype of iio_map_array_register() and devm_iio_map_array_register(), and a few structures that hold a "struct iio_map *" need to be adjusted. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 21086 760 0 21846 5556 drivers/iio/adc/axp20x_adc.o After: ===== text data bss dec hex filename 21470 360 0 21830 5546 drivers/iio/adc/axp20x_adc.o 33842 1697 384 35923 8c53 drivers/iio/addac/ad74413r.o -- Compile tested only Signed-off-by: Christophe JAILLET Link: https://patch.msgid.link/5729dc3cc3892ecf0d8ea28c5f7307b34e27493e.1725729801.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jonathan Cameron --- include/linux/iio/driver.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h index 7a157ed218f6..7f8b55551ed0 100644 --- a/include/linux/iio/driver.h +++ b/include/linux/iio/driver.h @@ -18,7 +18,7 @@ struct iio_map; * @map: array of mappings specifying association of channel with client */ int iio_map_array_register(struct iio_dev *indio_dev, - struct iio_map *map); + const struct iio_map *map); /** * iio_map_array_unregister() - tell the core to remove consumer mappings for @@ -38,6 +38,7 @@ int iio_map_array_unregister(struct iio_dev *indio_dev); * handle de-registration of the IIO map object when the device's refcount goes to * zero. */ -int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps); +int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, + const struct iio_map *maps); #endif -- cgit v1.2.3 From 0f702757c68b9790a844e5c07073d3d1c777b13a Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:34:11 +0800 Subject: ARM: samsung: Remove obsoleted declaration for s3c_hwmon_set_platdata The s3c_hwmon_set_platdata() have been removed since commit 0d297df03890 ("ARM: s3c: simplify platform code"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240826033411.4022822-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/hwmon-s3c.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/hwmon-s3c.h b/include/linux/platform_data/hwmon-s3c.h index 1707ad4147df..7d21e0c41037 100644 --- a/include/linux/platform_data/hwmon-s3c.h +++ b/include/linux/platform_data/hwmon-s3c.h @@ -33,14 +33,4 @@ struct s3c_hwmon_pdata { struct s3c_hwmon_chcfg *in[8]; }; -/** - * s3c_hwmon_set_platdata - Set platform data for S3C HWMON device - * @pd: Platform data to register to device. - * - * Register the given platform data for use with the S3C HWMON device. - * The call will copy the platform data, so the board definitions can - * make the structure itself __initdata. - */ -extern void __init s3c_hwmon_set_platdata(struct s3c_hwmon_pdata *pd); - #endif /* __HWMON_S3C_H__ */ -- cgit v1.2.3 From 3a6ad95d97eb62a7b7c804ef7eeb329a1f697d00 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:31:18 +0800 Subject: ASoC: samsung: Remove obsoleted declaration for s3c64xx_ac97_setup_gpio The s3c64xx_ac97_setup_gpio() have been removed since commit 0d297df03890 ("ARM: s3c: simplify platform code"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240826033118.4021727-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/asoc-s3c.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h index f9c00f839e9f..085dd8e8af76 100644 --- a/include/linux/platform_data/asoc-s3c.h +++ b/include/linux/platform_data/asoc-s3c.h @@ -13,8 +13,6 @@ #include -extern void s3c64xx_ac97_setup_gpio(int); - struct samsung_i2s_type { /* If the Primary DAI has 5.1 Channels */ #define QUIRK_PRI_6CHAN (1 << 0) -- cgit v1.2.3 From 2d3e0135cefccbcd8459112a8afe260e7b51ff6d Mon Sep 17 00:00:00 2001 From: Inbaraj E Date: Tue, 17 Sep 2024 15:13:55 +0530 Subject: dt-bindings: clock: samsung: remove define with number of clocks for FSD Number of clocks supported by Linux drivers might vary - sometimes we add new clocks, not exposed previously. Therefore these numbers of clocks should not be in the bindings, as that prevents changing them. Remove it entirely from the bindings, once Linux drivers stopped using them. Signed-off-by: Inbaraj E Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240917094355.37887-3-inbaraj.e@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/fsd-clk.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/fsd-clk.h b/include/dt-bindings/clock/fsd-clk.h index c8a2af1dd1ad..3f7b64d93558 100644 --- a/include/dt-bindings/clock/fsd-clk.h +++ b/include/dt-bindings/clock/fsd-clk.h @@ -28,7 +28,6 @@ #define DOUT_CMU_IMEM_ACLK 13 #define DOUT_CMU_IMEM_DMACLK 14 #define GAT_CMU_FSYS0_SHARED0DIV4 15 -#define CMU_NR_CLK 16 /* PERIC */ #define PERIC_SCLK_UART0 1 @@ -76,7 +75,6 @@ #define PERIC_EQOS_PHYRXCLK_MUX 43 #define PERIC_EQOS_PHYRXCLK 44 #define PERIC_DOUT_RGMII_CLK 45 -#define PERIC_NR_CLK 46 /* FSYS0 */ #define UFS0_MPHY_REFCLK_IXTAL24 1 @@ -101,7 +99,6 @@ #define FSYS0_EQOS_TOP0_IPCLKPORT_RGMII_CLK_I 20 #define FSYS0_EQOS_TOP0_IPCLKPORT_CLK_RX_I 21 #define FSYS0_DOUT_FSYS0_PERIBUS_GRP 22 -#define FSYS0_NR_CLK 23 /* FSYS1 */ #define PCIE_LINK0_IPCLKPORT_DBI_ACLK 1 @@ -112,7 +109,6 @@ #define PCIE_LINK1_IPCLKPORT_AUX_ACLK 6 #define PCIE_LINK1_IPCLKPORT_MSTR_ACLK 7 #define PCIE_LINK1_IPCLKPORT_SLV_ACLK 8 -#define FSYS1_NR_CLK 9 /* IMEM */ #define IMEM_DMA0_IPCLKPORT_ACLK 1 @@ -126,11 +122,9 @@ #define IMEM_TMU_TOP_IPCLKPORT_I_CLK_TS 9 #define IMEM_TMU_GPU_IPCLKPORT_I_CLK_TS 10 #define IMEM_TMU_GT_IPCLKPORT_I_CLK_TS 11 -#define IMEM_NR_CLK 12 /* MFC */ #define MFC_MFC_IPCLKPORT_ACLK 1 -#define MFC_NR_CLK 2 /* CAM_CSI */ #define CAM_CSI0_0_IPCLKPORT_I_ACLK 1 @@ -145,6 +139,5 @@ #define CAM_CSI2_1_IPCLKPORT_I_ACLK 10 #define CAM_CSI2_2_IPCLKPORT_I_ACLK 11 #define CAM_CSI2_3_IPCLKPORT_I_ACLK 12 -#define CAM_CSI_NR_CLK 13 #endif /*_DT_BINDINGS_CLOCK_FSD_H */ -- cgit v1.2.3 From a370b72ec7165ebe1230d0225cbe66f6526e68ef Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 21:48:13 +0900 Subject: tracing: Add a comment about ftrace_regs definition To clarify what will be expected on ftrace_regs, add a comment to the architecture independent definition of the ftrace_regs. Signed-off-by: Masami Hiramatsu (Google) Acked-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fd5e84d0ec47..42106b3de396 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -117,6 +117,32 @@ extern int ftrace_enabled; #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +/** + * ftrace_regs - ftrace partial/optimal register set + * + * ftrace_regs represents a group of registers which is used at the + * function entry and exit. There are three types of registers. + * + * - Registers for passing the parameters to callee, including the stack + * pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64) + * - Registers for passing the return values to caller. + * (e.g. rax and rdx on x86_64) + * - Registers for hooking the function call and return including the + * frame pointer (the frame pointer is architecture/config dependent) + * (e.g. rip, rbp and rsp for x86_64) + * + * Also, architecture dependent fields can be used for internal process. + * (e.g. orig_ax on x86_64) + * + * On the function entry, those registers will be restored except for + * the stack pointer, so that user can change the function parameters + * and instruction pointer (e.g. live patching.) + * On the function exit, only registers which is used for return values + * are restored. + * + * NOTE: user *must not* access regs directly, only do it via APIs, because + * the member can be changed according to the architecture. + */ struct ftrace_regs { struct pt_regs regs; }; -- cgit v1.2.3 From a312a0f7834e605e7c41570f0e9525d0fc4a70a4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:06 -0400 Subject: fgraph: Use fgraph data to store subtime for profiler Instead of having the "subtime" for the function profiler in the infrastructure ftrace_ret_stack structure, have it use the fgraph data reserve and retrieve functions. This will keep the limited shadow stack from wasting 8 bytes for something that is seldom used. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214826.780323141@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 42106b3de396..aabd348cad4a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1081,6 +1081,7 @@ struct fgraph_ops { void *fgraph_reserve_data(int idx, int size_bytes); void *fgraph_retrieve_data(int idx, int *size_bytes); +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); /* * Stack of return addresses for functions @@ -1091,9 +1092,6 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long long subtime; -#endif #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif -- cgit v1.2.3 From 3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:07 -0400 Subject: ftrace: Use a running sleeptime instead of saving on shadow stack The fgraph "sleep-time" option tells the function graph tracer and the profiler whether to include the time a function "sleeps" (is scheduled off the CPU) in its duration for the function. By default it is true, which means the duration of a function is calculated by the timestamp of when the function was entered to the timestamp of when it exits. If the "sleep-time" option is disabled, it needs to remove the time that the task was not running on the CPU during the function. Currently it is done in a sched_switch tracepoint probe where it moves the "calltime" (time of entry of the function) forward by the sleep time calculated. It updates all the calltime in the shadow stack. This is time consuming for those users of the function graph tracer that does not care about the sleep time. Instead, add a "ftrace_sleeptime" to the task_struct that gets the sleep time added each time the task wakes up. Then have the function entry save the current "ftrace_sleeptime" and on function exit, move the calltime forward by the difference of the current "ftrace_sleeptime" from the saved sleeptime. This removes one dependency of "calltime" needed to be on the shadow stack. It also simplifies the code that removes the sleep time of functions. TODO: Only enable the sched_switch tracepoint when this is needed. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214826.938908568@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..c08f3bdb11a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1441,6 +1441,7 @@ struct task_struct { /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; + unsigned long long ftrace_sleeptime; /* * Number of functions that haven't been traced -- cgit v1.2.3 From f1f36e22bee967db5e812a65e24389e54c46f3c2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:08 -0400 Subject: ftrace: Have calltime be saved in the fgraph storage The calltime field in the shadow stack frame is only used by the function graph tracer and profiler. But now that there's other users of the function graph infrastructure, this adds overhead and wastes space on the shadow stack. Move the calltime to the fgraph data storage, where the function graph and profiler entry functions will save it in its own graph storage and retrieve it in its exit functions. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214827.096968730@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aabd348cad4a..e684addf6508 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1091,7 +1091,6 @@ void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); struct ftrace_ret_stack { unsigned long ret; unsigned long func; - unsigned long long calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif -- cgit v1.2.3 From 26228256b796eb0145bdfb2ae34ec8c4c0ef1319 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:16 +0200 Subject: backlight: lcd: Test against struct fb_info.lcd_dev Add struct fb_info.lcd_dev for fbdev drivers to store a reference to their lcd device. Update the lcd's fb_notifier_callback() to test for this field. The lcd module can now detect if an lcd device belongs to an fbdev device. This works similar to the bl_dev for backlights and will allow for the removal of the check_fb callback from several fbdev driver's lcd devices. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-3-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/fb.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 267b59ead432..5ba187e08cf7 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -21,6 +21,7 @@ struct fb_info; struct file; struct i2c_adapter; struct inode; +struct lcd_device; struct module; struct notifier_block; struct page; @@ -480,6 +481,13 @@ struct fb_info { struct mutex bl_curve_mutex; u8 bl_curve[FB_BACKLIGHT_LEVELS]; #endif + + /* + * Assigned LCD device; set before framebuffer + * registration, remove after unregister + */ + struct lcd_device *lcd_dev; + #ifdef CONFIG_FB_DEFERRED_IO struct delayed_work deferred_work; unsigned long npagerefs; @@ -754,6 +762,11 @@ static inline struct backlight_device *fb_bl_device(struct fb_info *info) } #endif +static inline struct lcd_device *fb_lcd_device(struct fb_info *info) +{ + return info->lcd_dev; +} + /* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 -- cgit v1.2.3 From 48ffe2074c2864ab64ee2004e7ebf3d6a6730fbf Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:17 +0200 Subject: backlight: lcd: Add LCD_POWER_ constants for power states Duplicate FB_BLANK_ constants as LCD_POWER_ constants in the lcd header file. Allows lcd drivers to avoid including the fbdev header file and removes a compile-time dependency between the two subsystems. The new LCD_POWER_ constants have the same values as their FB_BLANK_ counterparts. Hence semantics does not change and the lcd drivers can be converted one by one. Each instance of FB_BLANK_UNBLANK becomes LCD_POWER_ON, each of FB_BLANK_POWERDOWN becomes LCD_POWER_OFF, FB_BLANK_NORMAL becomes LCD_POWER_REDUCED and FB_BLANK_VSYNC_SUSPEND becomes LCD_POWER_REDUCED_VSYNC_SUSPEND. Lcd code or drivers do not use FB_BLANK_HSYNC_SUSPEND, so no new constants for this is being added. The tokens LCD_POWER_REDUCED and LCD_POWER_REDUCED_VSYNC_SUSPEND are deprecated and drivers should replace them with LCD_POWER_ON and LCD_POWER_OFF. See also commit a1cacb8a8e70 ("backlight: Add BACKLIGHT_POWER_ constants for power states"), which added similar constants for backlight drivers. v2: - fix typo in commit description Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-4-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/lcd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 68703a51dc53..dfcc54d327f5 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -14,6 +14,11 @@ #include #include +#define LCD_POWER_ON (0) +#define LCD_POWER_REDUCED (1) // deprecated; don't use in new code +#define LCD_POWER_REDUCED_VSYNC_SUSPEND (2) // deprecated; don't use in new code +#define LCD_POWER_OFF (4) + /* Notes on locking: * * lcd_device->ops_lock is an internal backlight lock protecting the ops -- cgit v1.2.3 From c38a7db56d18b3ec07f3ad52c1e3f1f05c375011 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:31 +0200 Subject: backlight: platform_lcd: Remove match_fb from struct plat_lcd_data The match_fb callback in struct plat_lcd_data is unused. Remove it. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-18-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/video/platform_lcd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/video/platform_lcd.h b/include/video/platform_lcd.h index 6a95184a28c1..2bdf46519298 100644 --- a/include/video/platform_lcd.h +++ b/include/video/platform_lcd.h @@ -8,11 +8,8 @@ */ struct plat_lcd_data; -struct fb_info; struct plat_lcd_data { int (*probe)(struct plat_lcd_data *); void (*set_power)(struct plat_lcd_data *, unsigned int power); - int (*match_fb)(struct plat_lcd_data *, struct fb_info *); }; - -- cgit v1.2.3 From 43e1120deb3768c86aa3875c7073658e44a30ea5 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:40 +0200 Subject: backlight: lcd: Replace check_fb with controls_device Rename check_fb in struct lcd_ops to controls_device. The callback is now independent from fbdev's struct fb_info and tests if an lcd device controls a hardware display device. The new naming and semantics follow similar functionality for backlight devices. v2: - fix typos in commit description (Daniel) Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-27-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/lcd.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index dfcc54d327f5..8399b5ed48f2 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -35,7 +35,6 @@ */ struct lcd_device; -struct fb_info; struct lcd_properties { /* The maximum value for contrast (read-only) */ @@ -54,9 +53,18 @@ struct lcd_ops { int (*set_contrast)(struct lcd_device *, int contrast); /* Set LCD panel mode (resolutions ...) */ int (*set_mode)(struct lcd_device *, struct fb_videomode *); - /* Check if given framebuffer device is the one LCD is bound to; - return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */ - int (*check_fb)(struct lcd_device *, struct fb_info *); + + /* + * Check if the LCD controls the given display device. This + * operation is optional and if not implemented it is assumed that + * the display is always the one controlled by the LCD. + * + * RETURNS: + * + * If display_dev is NULL or display_dev matches the device controlled by + * the LCD, return true. Otherwise return false. + */ + bool (*controls_device)(struct lcd_device *lcd, struct device *display_device); }; struct lcd_device { -- cgit v1.2.3 From 02e224d096ef58fe59e96609de6018e133f33512 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:41 +0200 Subject: backlight: lcd: Remove struct fb_videomode from set_mode callback Implementations of struct lcd_ops.set_mode only require the resolution from struct fb_videomode. Pass the xres and yres fields, but remove the dependency on the fbdev data structure. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-28-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/lcd.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 8399b5ed48f2..59a80b396a71 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -51,8 +51,11 @@ struct lcd_ops { int (*get_contrast)(struct lcd_device *); /* Set LCD panel contrast */ int (*set_contrast)(struct lcd_device *, int contrast); - /* Set LCD panel mode (resolutions ...) */ - int (*set_mode)(struct lcd_device *, struct fb_videomode *); + + /* + * Set LCD panel mode (resolutions ...) + */ + int (*set_mode)(struct lcd_device *lcd, u32 xres, u32 yres); /* * Check if the LCD controls the given display device. This -- cgit v1.2.3 From 0d580d99749e759b62dc8e28f511310e9235da7a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:42 +0200 Subject: backlight: lcd: Do not include in lcd header With the exception of fb_notifier_callback(), none of the lcd code uses fbdev; especially not the lcd drivers. Remove the include statement for from the public lcd header. v2: - fix typos in commit description Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-29-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/lcd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 59a80b396a71..c3ccdff4519a 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -12,7 +12,6 @@ #include #include #include -#include #define LCD_POWER_ON (0) #define LCD_POWER_REDUCED (1) // deprecated; don't use in new code -- cgit v1.2.3 From fb4c31587adfa9cd50661a535bdbfcc4da57ee38 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 10 Sep 2024 18:32:51 +0200 Subject: reset: amlogic: add auxiliary reset driver support Add support for the reset controller present in the audio clock controller of the g12 and sm1 SoC families, using the auxiliary bus. This is expected to replace the driver currently present directly within the related clock driver. Signed-off-by: Jerome Brunet Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20240910-meson-rst-aux-v5-9-60be62635d3e@baylibre.com Signed-off-by: Philipp Zabel --- include/soc/amlogic/reset-meson-aux.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/soc/amlogic/reset-meson-aux.h (limited to 'include') diff --git a/include/soc/amlogic/reset-meson-aux.h b/include/soc/amlogic/reset-meson-aux.h new file mode 100644 index 000000000000..d8a15d48c984 --- /dev/null +++ b/include/soc/amlogic/reset-meson-aux.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SOC_RESET_MESON_AUX_H +#define __SOC_RESET_MESON_AUX_H + +#include + +struct device; +struct regmap; + +#if IS_ENABLED(CONFIG_RESET_MESON_AUX) +int devm_meson_rst_aux_register(struct device *dev, + struct regmap *map, + const char *adev_name); +#else +static inline int devm_meson_rst_aux_register(struct device *dev, + struct regmap *map, + const char *adev_name) +{ + return 0; +} +#endif + +#endif /* __SOC_RESET_MESON_AUX_H */ -- cgit v1.2.3 From 44badc908f2c85711cb18e45e13119c10ad3a05f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 24 Sep 2024 09:05:45 +0100 Subject: tcp: Fix spelling mistake "emtpy" -> "empty" There is a spelling mistake in a WARN_ONCE message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Jason Xing Link: https://patch.msgid.link/20240924080545.1324962-1-colin.i.king@gmail.com Signed-off-by: Paolo Abeni --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d1948d357dad..739a9fb83d0c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2442,7 +2442,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; } else { WARN_ONCE(1, - "rtx queue emtpy: " + "rtx queue empty: " "out:%u sacked:%u lost:%u retrans:%u " "tlp_high_seq:%u sk_state:%u ca_state:%u " "advmss:%u mss_cache:%u pmtu:%u\n", -- cgit v1.2.3 From 86b9ce0a8a6cf9397503920cd5412d207a93fae9 Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Fri, 6 Sep 2024 16:31:12 +0530 Subject: firmware: xilinx: Add Pinctrl Get Attribute ID Add Pinctrl Get Attribute ID to the query ids list. Signed-off-by: Sai Krishna Potthuri Link: https://lore.kernel.org/20240906110113.3154327-3-sai.krishna.potthuri@amd.com Signed-off-by: Linus Walleij --- include/linux/firmware/xlnx-zynqmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index d7d07afc0532..3b4ce4ec5d3f 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -238,6 +238,7 @@ enum pm_query_id { PM_QID_PINCTRL_GET_PIN_GROUPS = 11, PM_QID_CLOCK_GET_NUM_CLOCKS = 12, PM_QID_CLOCK_GET_MAX_DIVISOR = 13, + PM_QID_PINCTRL_GET_ATTRIBUTES = 15, }; enum rpu_oper_mode { -- cgit v1.2.3 From b875bd5b381e114115922944f7a01e31f8b07c2a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 11 Sep 2024 15:43:00 -0400 Subject: exportfs: Remove EXPORT_OP_ASYNC_LOCK Now that GFS2 and OCFS2 are signalling async ->lock() support with FOP_ASYNC_LOCK and checks for support are converted, we can remove EXPORT_OP_ASYNC_LOCK. Signed-off-by: Benjamin Coddington Link: https://lore.kernel.org/r/0a114db814fec3086f937ae3d44a086f13b8de26.1726083391.git.bcodding@redhat.com Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 893a1d21dc1c..1ab165c2939f 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -250,19 +250,6 @@ struct export_operations { unsigned long flags; }; -/** - * exportfs_lock_op_is_async() - export op supports async lock operation - * @export_ops: the nfs export operations to check - * - * Returns true if the nfs export_operations structure has - * EXPORT_OP_ASYNC_LOCK in their flags set - */ -static inline bool -exportfs_lock_op_is_async(const struct export_operations *export_ops) -{ - return export_ops->flags & EXPORT_OP_ASYNC_LOCK; -} - extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); -- cgit v1.2.3 From dad35f7d2fc14e446669d4cab100597a6798eae5 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 25 Sep 2024 18:40:09 +0200 Subject: reset: replace boolean parameters with flags parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce enum reset_control_flags and replace the list of boolean parameters to the internal reset_control_get functions with a single flags parameter, before adding more boolean options. The separate boolean parameters have been shown to be error prone in the past. See for example commit a57f68ddc886 ("reset: Fix devm bulk optional exclusive control getter"). Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240925-reset-get-deasserted-v2-1-b3601bbd0458@pengutronix.de Signed-off-by: Philipp Zabel --- include/linux/reset.h | 161 +++++++++++++++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 514ddf003efc..99296af98f81 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -25,6 +25,33 @@ struct reset_control_bulk_data { struct reset_control *rstc; }; +#define RESET_CONTROL_FLAGS_BIT_SHARED BIT(0) /* not exclusive */ +#define RESET_CONTROL_FLAGS_BIT_OPTIONAL BIT(1) +#define RESET_CONTROL_FLAGS_BIT_ACQUIRED BIT(2) /* iff exclusive, not released */ + +/** + * enum reset_control_flags - Flags that can be passed to the reset_control_get functions + * to determine the type of reset control. + * These values cannot be OR'd. + * + * @RESET_CONTROL_EXCLUSIVE: exclusive, acquired, + * @RESET_CONTROL_EXCLUSIVE_RELEASED: exclusive, released, + * @RESET_CONTROL_SHARED: shared + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE: optional, exclusive, acquired + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED: optional, exclusive, released + * @RESET_CONTROL_OPTIONAL_SHARED: optional, shared + */ +enum reset_control_flags { + RESET_CONTROL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_EXCLUSIVE_RELEASED = 0, + RESET_CONTROL_SHARED = RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED = RESET_CONTROL_FLAGS_BIT_OPTIONAL, + RESET_CONTROL_OPTIONAL_SHARED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_SHARED, +}; + #ifdef CONFIG_RESET_CONTROLLER int reset_control_reset(struct reset_control *rstc); @@ -42,30 +69,25 @@ int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rs void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index, bool shared, - bool optional, bool acquired); + const char *id, int index, enum reset_control_flags flags); struct reset_control *__reset_control_get(struct device *dev, const char *id, - int index, bool shared, - bool optional, bool acquired); + int index, enum reset_control_flags flags); void reset_control_put(struct reset_control *rstc); int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired); + enum reset_control_flags flags); void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs); int __device_reset(struct device *dev, bool optional); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index, bool shared, - bool optional, bool acquired); + const char *id, int index, enum reset_control_flags flags); int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired); + enum reset_control_flags flags); struct reset_control *devm_reset_control_array_get(struct device *dev, - bool shared, bool optional); -struct reset_control *of_reset_control_array_get(struct device_node *np, - bool shared, bool optional, - bool acquired); + enum reset_control_flags flags); +struct reset_control *of_reset_control_array_get(struct device_node *np, enum reset_control_flags); int reset_control_get_count(struct device *dev); @@ -116,17 +138,19 @@ static inline int __device_reset(struct device *dev, bool optional) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index, bool shared, - bool optional, bool acquired) + const char *id, int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__reset_control_get( struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } @@ -162,8 +186,10 @@ reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs) static inline int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? 0 : -EOPNOTSUPP; } @@ -174,30 +200,36 @@ reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs) static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? 0 : -EOPNOTSUPP; } static inline struct reset_control * -devm_reset_control_array_get(struct device *dev, bool shared, bool optional) +devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control * -of_reset_control_array_get(struct device_node *np, bool shared, bool optional, - bool acquired) +of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } @@ -236,7 +268,7 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control * __must_check reset_control_get_exclusive(struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, false, true); + return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -253,7 +285,7 @@ static inline int __must_check reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE); } /** @@ -274,7 +306,7 @@ static inline struct reset_control * __must_check reset_control_get_exclusive_released(struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, false, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -295,7 +327,7 @@ static inline int __must_check reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -316,7 +348,8 @@ static inline int __must_check reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -344,7 +377,7 @@ reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_r static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, true, false, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } /** @@ -361,7 +394,7 @@ static inline int __must_check reset_control_bulk_get_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } /** @@ -378,7 +411,7 @@ reset_control_bulk_get_shared(struct device *dev, int num_rstcs, static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, true, true); + return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -398,7 +431,7 @@ static inline int __must_check reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -415,7 +448,7 @@ reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, true, true, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -435,7 +468,7 @@ static inline int __must_check reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -451,7 +484,7 @@ reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, false, false, true); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -471,7 +504,7 @@ static inline struct reset_control *of_reset_control_get_exclusive( static inline struct reset_control *of_reset_control_get_optional_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, false, true, true); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -496,7 +529,7 @@ static inline struct reset_control *of_reset_control_get_optional_exclusive( static inline struct reset_control *of_reset_control_get_shared( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, true, false, false); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_SHARED); } /** @@ -513,7 +546,7 @@ static inline struct reset_control *of_reset_control_get_shared( static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, false, false, true); + return __of_reset_control_get(node, NULL, index, RESET_CONTROL_EXCLUSIVE); } /** @@ -541,7 +574,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( static inline struct reset_control *of_reset_control_get_shared_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, true, false, false); + return __of_reset_control_get(node, NULL, index, RESET_CONTROL_SHARED); } /** @@ -560,7 +593,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, false, true); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -580,7 +613,8 @@ static inline int __must_check devm_reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_EXCLUSIVE); } /** @@ -599,7 +633,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive_released(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, false, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -619,7 +653,8 @@ static inline int __must_check devm_reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -638,7 +673,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_optional_exclusive_released(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, true, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -658,7 +693,8 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -673,7 +709,7 @@ devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int static inline struct reset_control *devm_reset_control_get_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true, false, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } /** @@ -693,7 +729,7 @@ static inline int __must_check devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } /** @@ -711,7 +747,7 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, true, true); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -731,7 +767,8 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -749,7 +786,7 @@ devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs static inline struct reset_control *devm_reset_control_get_optional_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true, true, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -769,7 +806,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -787,7 +824,7 @@ devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control * devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, false, false, true); + return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_EXCLUSIVE); } /** @@ -803,7 +840,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) static inline struct reset_control * devm_reset_control_get_shared_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, true, false, false); + return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_SHARED); } /* @@ -851,54 +888,54 @@ static inline struct reset_control *devm_reset_control_get_by_index( static inline struct reset_control * devm_reset_control_array_get_exclusive(struct device *dev) { - return devm_reset_control_array_get(dev, false, false); + return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * devm_reset_control_array_get_shared(struct device *dev) { - return devm_reset_control_array_get(dev, true, false); + return devm_reset_control_array_get(dev, RESET_CONTROL_SHARED); } static inline struct reset_control * devm_reset_control_array_get_optional_exclusive(struct device *dev) { - return devm_reset_control_array_get(dev, false, true); + return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * devm_reset_control_array_get_optional_shared(struct device *dev) { - return devm_reset_control_array_get(dev, true, true); + return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_SHARED); } static inline struct reset_control * of_reset_control_array_get_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, false, false, true); + return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_exclusive_released(struct device_node *node) { - return of_reset_control_array_get(node, false, false, false); + return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE_RELEASED); } static inline struct reset_control * of_reset_control_array_get_shared(struct device_node *node) { - return of_reset_control_array_get(node, true, false, true); + return of_reset_control_array_get(node, RESET_CONTROL_SHARED); } static inline struct reset_control * of_reset_control_array_get_optional_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, false, true, true); + return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_optional_shared(struct device_node *node) { - return of_reset_control_array_get(node, true, true, true); + return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_SHARED); } #endif -- cgit v1.2.3 From d872bed85036f5e60c66b0dd0994346b4ea6470c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 25 Sep 2024 18:40:10 +0200 Subject: reset: Add devres helpers to request pre-deasserted reset controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add devres helpers - devm_reset_control_bulk_get_exclusive_deasserted - devm_reset_control_bulk_get_optional_exclusive_deasserted - devm_reset_control_bulk_get_optional_shared_deasserted - devm_reset_control_bulk_get_shared_deasserted - devm_reset_control_get_exclusive_deasserted - devm_reset_control_get_optional_exclusive_deasserted - devm_reset_control_get_optional_shared_deasserted - devm_reset_control_get_shared_deasserted to request and immediately deassert reset controls. During cleanup, reset_control_assert() will be called automatically on the returned reset controls. Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240925-reset-get-deasserted-v2-2-b3601bbd0458@pengutronix.de Signed-off-by: Philipp Zabel --- include/linux/reset.h | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 99296af98f81..2986ced69a02 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -28,6 +28,7 @@ struct reset_control_bulk_data { #define RESET_CONTROL_FLAGS_BIT_SHARED BIT(0) /* not exclusive */ #define RESET_CONTROL_FLAGS_BIT_OPTIONAL BIT(1) #define RESET_CONTROL_FLAGS_BIT_ACQUIRED BIT(2) /* iff exclusive, not released */ +#define RESET_CONTROL_FLAGS_BIT_DEASSERTED BIT(3) /** * enum reset_control_flags - Flags that can be passed to the reset_control_get functions @@ -35,21 +36,35 @@ struct reset_control_bulk_data { * These values cannot be OR'd. * * @RESET_CONTROL_EXCLUSIVE: exclusive, acquired, + * @RESET_CONTROL_EXCLUSIVE_DEASSERTED: exclusive, acquired, deasserted * @RESET_CONTROL_EXCLUSIVE_RELEASED: exclusive, released, * @RESET_CONTROL_SHARED: shared + * @RESET_CONTROL_SHARED_DEASSERTED: shared, deasserted * @RESET_CONTROL_OPTIONAL_EXCLUSIVE: optional, exclusive, acquired + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED: optional, exclusive, acquired, deasserted * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED: optional, exclusive, released * @RESET_CONTROL_OPTIONAL_SHARED: optional, shared + * @RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED: optional, shared, deasserted */ enum reset_control_flags { RESET_CONTROL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_EXCLUSIVE_DEASSERTED = RESET_CONTROL_FLAGS_BIT_ACQUIRED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, RESET_CONTROL_EXCLUSIVE_RELEASED = 0, RESET_CONTROL_SHARED = RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_SHARED_DEASSERTED = RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, RESET_CONTROL_OPTIONAL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_OPTIONAL | RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_ACQUIRED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED = RESET_CONTROL_FLAGS_BIT_OPTIONAL, RESET_CONTROL_OPTIONAL_SHARED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, }; #ifdef CONFIG_RESET_CONTROLLER @@ -596,6 +611,25 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } +/** + * devm_reset_control_get_exclusive_deasserted - resource managed + * reset_control_get_exclusive() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_exclusive() + reset_control_deassert(). For reset + * controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See reset_control_get_exclusive() for more information. + */ +static inline struct reset_control * __must_check +devm_reset_control_get_exclusive_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_DEASSERTED); +} + /** * devm_reset_control_bulk_get_exclusive - resource managed * reset_control_bulk_get_exclusive() @@ -712,6 +746,25 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } +/** + * devm_reset_control_get_shared_deasserted - resource managed + * reset_control_get_shared() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared() + reset_control_deassert(). For reset + * controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_shared() for more information. + */ +static inline struct reset_control * __must_check +devm_reset_control_get_shared_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED_DEASSERTED); +} + /** * devm_reset_control_bulk_get_shared - resource managed * reset_control_bulk_get_shared() @@ -732,6 +785,28 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } +/** + * devm_reset_control_bulk_get_shared_deasserted - resource managed + * reset_control_bulk_get_shared() + + * reset_control_bulk_deassert() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_shared() + reset_control_bulk_deassert(). For + * reset controllers returned from this function, reset_control_bulk_assert() + + * reset_control_bulk_put() are called automatically on driver detach. + * + * See devm_reset_control_bulk_get_shared() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_shared_deasserted(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_SHARED_DEASSERTED); +} + /** * devm_reset_control_get_optional_exclusive - resource managed * reset_control_get_optional_exclusive() @@ -750,6 +825,25 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive( return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } +/** + * devm_reset_control_get_optional_exclusive_deasserted - resource managed + * reset_control_get_optional_exclusive() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_exclusive() + reset_control_deassert(). + * For reset controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_optional_exclusive() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_optional_exclusive_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED); +} + /** * devm_reset_control_bulk_get_optional_exclusive - resource managed * reset_control_bulk_get_optional_exclusive() @@ -789,6 +883,25 @@ static inline struct reset_control *devm_reset_control_get_optional_shared( return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } +/** + * devm_reset_control_get_optional_shared_deasserted - resource managed + * reset_control_get_optional_shared() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_shared() + reset_control_deassert(). For + * reset controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_optional_shared() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_optional_shared_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED); +} + /** * devm_reset_control_bulk_get_optional_shared - resource managed * reset_control_bulk_get_optional_shared() -- cgit v1.2.3 From dab9cd4b8e7f5fce4e7a0424991ec4714a780f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 1 Oct 2024 10:51:39 +0200 Subject: pwm: Add kernel doc for members added to pwm_ops recently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The callbacks for lowlevel pwm drivers were expanded to handle the new waveform abstraction. When doing that I missed to expand the kernel doc description. This is catched up here. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/linux-next/20241001135207.125ca7af@canb.auug.org.au Fixes: 17e40c25158f ("pwm: New abstraction for PWM waveforms") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20241001085138.1025818-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index c3d9ddeafa65..f1cb1e5b0a36 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -276,6 +276,11 @@ struct pwm_capture { * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM * @capture: capture and report PWM signal + * @sizeof_wfhw: size (in bytes) of driver specific waveform presentation + * @round_waveform_tohw: convert a struct pwm_waveform to driver specific presentation + * @round_waveform_fromhw: convert a driver specific waveform presentation to struct pwm_waveform + * @read_waveform: read driver specific waveform presentation from hardware + * @write_waveform: write driver specific waveform presentation to hardware * @apply: atomically apply a new PWM config * @get_state: get the current PWM state. */ -- cgit v1.2.3 From 030ace430afcf847f537227afceb22dfe8fb8fc8 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 26 Sep 2024 22:19:52 +0800 Subject: spi: spi-mem: Allow specifying the byte order in Octal DTR mode There are NOR flashes (Macronix) that swap the bytes on a 16-bit boundary when configured in Octal DTR mode. The byte order of 16-bit words is swapped when read or written in Octal Double Transfer Rate (DTR) mode compared to Single Transfer Rate (STR) modes. If one writes D0 D1 D2 D3 bytes using 1-1-1 mode, and uses 8D-8D-8D SPI mode for reading, it will read back D1 D0 D3 D2. Swapping the bytes may introduce some endianness problems. It can affect the boot sequence if the entire boot sequence is not handled in either 8D-8D-8D mode or 1-1-1 mode. Therefore, it is necessary to swap the bytes back to ensure the same byte order as in STR modes. Fortunately there are controllers that could swap the bytes back at runtime, addressing the flash's endianness requirements. Provide a way for the upper layers to specify the byte order in Octal DTR mode. Merge Tudor's patch and add modifications for suiting newer version of Linux kernel. Suggested-by: Michael Walle Signed-off-by: JaimeLiao Signed-off-by: AlvinZhou Acked-by: Mark Brown Link: https://lore.kernel.org/r/20240926141956.2386374-3-alvinzhou.tw@gmail.com Signed-off-by: Tudor Ambarus --- include/linux/spi/spi-mem.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index f866d5c8ed32..c46d2b8029be 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -90,6 +90,8 @@ enum spi_mem_data_dir { * @data.buswidth: number of IO lanes used to send/receive the data * @data.dtr: whether the data should be sent in DTR mode or not * @data.ecc: whether error correction is required or not + * @data.swap16: whether the byte order of 16-bit words is swapped when read + * or written in Octal DTR mode compared to STR mode. * @data.dir: direction of the transfer * @data.nbytes: number of data bytes to send/receive. Can be zero if the * operation does not involve transferring data @@ -124,7 +126,8 @@ struct spi_mem_op { u8 buswidth; u8 dtr : 1; u8 ecc : 1; - u8 __pad : 6; + u8 swap16 : 1; + u8 __pad : 5; enum spi_mem_data_dir dir; unsigned int nbytes; union { @@ -297,10 +300,13 @@ struct spi_controller_mem_ops { * struct spi_controller_mem_caps - SPI memory controller capabilities * @dtr: Supports DTR operations * @ecc: Supports operations with error correction + * @swap16: Supports swapping bytes on a 16 bit boundary when configured in + * Octal DTR */ struct spi_controller_mem_caps { bool dtr; bool ecc; + bool swap16; }; #define spi_mem_controller_is_capable(ctlr, cap) \ -- cgit v1.2.3 From 848f2bbb363d4cdb4202db328a103fd3c34e21a2 Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Tue, 30 Jul 2024 01:43:42 -0700 Subject: firmware: xilinx: Add missing debug firmware interfaces Add missing PM EEMI APIs interface in debug firmware driver. The debugfs firmware driver interface is intended for testing and debugging the EEMI APIs only. This interface does not contain any checking regarding improper usage, and the number, type and valid ranges of the arguments. This interface must be used with a lot of care. In fact, accessing this interface during normal PM operation will very likely cause unexpected problems. The debugfs interface shouldn't be used in the production system and hence it is disabled by default in defconfig. Signed-off-by: Ronak Jain Signed-off-by: Radhey Shyam Pandey Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/20240730084342.1683231-1-ronak.jain@amd.com --- include/linux/firmware/xlnx-zynqmp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index d7d07afc0532..563382cf16f2 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -218,9 +218,13 @@ enum pm_ioctl_id { /* Runtime feature configuration */ IOCTL_SET_FEATURE_CONFIG = 26, IOCTL_GET_FEATURE_CONFIG = 27, + /* IOCTL for Secure Read/Write Interface */ + IOCTL_READ_REG = 28, /* Dynamic SD/GEM configuration */ IOCTL_SET_SD_CONFIG = 30, IOCTL_SET_GEM_CONFIG = 31, + /* IOCTL to get default/current QoS */ + IOCTL_GET_QOS = 34, }; enum pm_query_id { -- cgit v1.2.3 From f33d6099edf78e3c97900c0173fedbfecc025a9e Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Fri, 30 Aug 2024 03:00:42 -0700 Subject: firmware: xilinx: use u32 for reset ID in reset APIs Refactors the reset handling mechanisms by replacing the reset ID's enum type with a u32. This update improves flexibility, allowing the reset ID to accommodate a broader range of values, including those that may not fit into predefined enum values. The use of u32 for reset ID enhances extensibility, especially for hardware platforms or features where more granular control of reset operations is required. By shifting to a general integer type, this change reduces constraints and simplifies integration with other system components that rely on non-enum-based reset IDs. Signed-off-by: Ronak Jain Link: https://lore.kernel.org/r/20240830100042.3163511-1-ronak.jain@amd.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 563382cf16f2..5b938fc2adad 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -557,9 +557,9 @@ int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data); int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value); int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type); int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select); -int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag); -int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status); +int zynqmp_pm_reset_get_status(const u32 reset, u32 *status); unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode); int zynqmp_pm_bootmode_write(u32 ps_mode); int zynqmp_pm_init_finalize(void); @@ -702,14 +702,13 @@ static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select) return -ENODEV; } -static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +static inline int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } -static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, - u32 *status) +static inline int zynqmp_pm_reset_get_status(const u32 reset, u32 *status) { return -ENODEV; } -- cgit v1.2.3 From 92fb71333d5737d0296fb968a653dfda4b225175 Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Thu, 19 Sep 2024 22:55:01 -0700 Subject: firmware: xilinx: add support for new SMC call format Added zynqmp_pm_invoke_fw_fn() to use new SMC format in which lower 12 bits of SMC id are fixed and firmware header is moved to subsequent SMC arguments. The new SMC format supports full request and response buffers. Added zynqmp_pm_get_sip_svc_version() to get SiP SVC version number to check if TF-A is newer or older and use the SMC format accordingly to handle backward compatibility. Used new SMC format for PM_QUERY_DATA API as more response values are required in it. Signed-off-by: Ronak Jain Signed-off-by: Jay Buddhabhatti Link: https://lore.kernel.org/r/20240920055501.2658642-1-ronak.jain@amd.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5b938fc2adad..76d85ad82ec0 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,7 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx - * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. * * Michal Simek * Davorin Mista @@ -32,6 +32,19 @@ /* SMC SIP service Call Function Identifier Prefix */ #define PM_SIP_SVC 0xC2000000 +/* SMC function ID to get SiP SVC version */ +#define GET_SIP_SVC_VERSION (0x8200ff03U) + +/* SiP Service Calls version numbers */ +#define SIP_SVC_VERSION_MAJOR (0U) +#define SIP_SVC_VERSION_MINOR (2U) + +#define SIP_SVC_PASSTHROUGH_VERSION ((SIP_SVC_VERSION_MAJOR << 16) | \ + SIP_SVC_VERSION_MINOR) + +/* Fixed ID for FW specific APIs */ +#define PASS_THROUGH_FW_CMD_ID GENMASK(11, 0) + /* PM API versions */ #define PM_API_VERSION_1 1 #define PM_API_VERSION_2 2 @@ -51,6 +64,7 @@ #define API_ID_MASK GENMASK(7, 0) #define MODULE_ID_MASK GENMASK(11, 8) +#define PLM_MODULE_ID_MASK GENMASK(15, 8) /* Firmware feature check version mask */ #define FIRMWARE_VERSION_MASK 0xFFFFU @@ -62,7 +76,13 @@ #define GET_CALLBACK_DATA 0xa01 /* Number of 32bits values in payload */ -#define PAYLOAD_ARG_CNT 4U +#define PAYLOAD_ARG_CNT 7U + +/* Number of 64bits arguments for SMC call */ +#define SMC_ARG_CNT_64 8U + +/* Number of 32bits arguments for SMC call */ +#define SMC_ARG_CNT_32 13U /* Number of arguments for a callback */ #define CB_ARG_CNT 4 @@ -130,6 +150,7 @@ enum pm_module_id { PM_MODULE_ID = 0x0, + XPM_MODULE_ID = 0x2, XSEM_MODULE_ID = 0x3, TF_A_MODULE_ID = 0xa, }; @@ -537,6 +558,7 @@ struct zynqmp_pm_query_data { }; int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); +int zynqmp_pm_invoke_fw_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_get_api_version(u32 *version); -- cgit v1.2.3 From a849a0273d0f73a252d14d31c5003a8059ea51fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Sep 2024 15:17:37 +0200 Subject: ntp: Remove unused tick_nsec tick_nsec is only updated in the NTP core, but there are no users. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20240911-devel-anna-maria-b4-timers-ptp-ntp-v1-1-2d52f4e13476@linutronix.de --- include/linux/timex.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 3871b06bd302..7f7a12fd8200 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -145,7 +145,6 @@ unsigned long random_get_entropy_fallback(void); * estimated error = NTP dispersion. */ extern unsigned long tick_usec; /* USER_HZ period (usec) */ -extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ /* Required to safely shift negative values */ #define shift_right(x, s) ({ \ -- cgit v1.2.3 From 66606a93849bfe3cbe9f0b801b40f60b87c54e11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Sep 2024 15:17:38 +0200 Subject: ntp: Make tick_usec static There are no users of tick_usec outside of the NTP core code. Therefore make tick_usec static. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20240911-devel-anna-maria-b4-timers-ptp-ntp-v1-2-2d52f4e13476@linutronix.de --- include/linux/timex.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 7f7a12fd8200..4ee32eff3f22 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -139,13 +139,6 @@ unsigned long random_get_entropy_fallback(void); #define MAXSEC 2048 /* max interval between updates (s) */ #define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ -/* - * kernel variables - * Note: maximum error = NTP sync distance = dispersion + delay / 2; - * estimated error = NTP dispersion. - */ -extern unsigned long tick_usec; /* USER_HZ period (usec) */ - /* Required to safely shift negative values */ #define shift_right(x, s) ({ \ __typeof__(x) __x = (x); \ -- cgit v1.2.3 From 8102c4daf44ab86c2d2226a8136bec905d6e2bd1 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 11 Sep 2024 10:30:20 +0100 Subject: timekeeping: Add the boot clock to system time snapshot For tracing purpose, the boot clock is interesting as it doesn't stop on suspend. Export it as part of the time snapshot. This will later allow the hypervisor to add boot clock timestamps to its events. Signed-off-by: Vincent Donnefort Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20240911093029.3279154-5-vdonnefort@google.com --- include/linux/timekeeping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..e85c27347e44 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -275,6 +275,7 @@ struct ktime_timestamps { * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time + * @boot: Boot time * @raw: Monotonic raw system time * @cs_id: Clocksource ID * @clock_was_set_seq: The sequence number of clock-was-set events @@ -283,6 +284,7 @@ struct ktime_timestamps { struct system_time_snapshot { u64 cycles; ktime_t real; + ktime_t boot; ktime_t raw; enum clocksource_ids cs_id; unsigned int clock_was_set_seq; -- cgit v1.2.3 From f69767a1ada3ac74be2e1ac0795a05e1d1384eff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:50 -0500 Subject: PCI: Add TLP Processing Hints (TPH) support Add support for PCIe TLP Processing Hints (TPH) support (see PCIe r6.2, sec 6.17). Add TPH register definitions in pci_regs.h, including the TPH Requester capability register, TPH Requester control register, TPH Completer capability, and the ST fields of MSI-X entry. Introduce pcie_enable_tph() and pcie_disable_tph(), enabling drivers to toggle TPH support and configure specific ST mode as needed. Also add a new kernel parameter, "pci=notph", allowing users to disable TPH support across the entire system. Link: https://lore.kernel.org/r/20241002165954.128085-2-wei.huang2@amd.com Co-developed-by: Jing Liu Co-developed-by: Paul Luse Co-developed-by: Eric Van Tassell Signed-off-by: Jing Liu Signed-off-by: Paul Luse Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner --- include/linux/pci-tph.h | 21 +++++++++++++++++++++ include/linux/pci.h | 7 +++++++ include/uapi/linux/pci_regs.h | 37 +++++++++++++++++++++++++++++-------- 3 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 include/linux/pci-tph.h (limited to 'include') diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h new file mode 100644 index 000000000000..58654a334ffb --- /dev/null +++ b/include/linux/pci-tph.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TPH (TLP Processing Hints) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell + * Wei Huang + */ +#ifndef LINUX_PCI_TPH_H +#define LINUX_PCI_TPH_H + +#ifdef CONFIG_PCIE_TPH +void pcie_disable_tph(struct pci_dev *pdev); +int pcie_enable_tph(struct pci_dev *pdev, int mode); +#else +static inline void pcie_disable_tph(struct pci_dev *pdev) { } +static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ return -EINVAL; } +#endif + +#endif /* LINUX_PCI_TPH_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..8351d76b6e12 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -434,6 +434,7 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ + unsigned int tph_enabled:1; /* TLP Processing Hints */ unsigned int is_managed:1; /* Managed via devres */ unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ @@ -534,6 +535,12 @@ struct pci_dev { /* These methods index pci_reset_fn_methods[] */ u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */ + +#ifdef CONFIG_PCIE_TPH + u16 tph_cap; /* TPH capability offset */ + u8 tph_mode; /* TPH mode */ + u8 tph_req_type; /* TPH requester type */ +#endif }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..155dea741615 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -340,7 +340,8 @@ #define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ #define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ #define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* Mask Bit */ +#define PCI_MSIX_ENTRY_CTRL_ST 0xffff0000 /* Steering Tag */ /* CompactPCI Hotswap Register */ @@ -659,6 +660,7 @@ #define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion */ #define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ +#define PCI_EXP_DEVCAP2_TPH_COMP_MASK 0x00003000 /* TPH completer support */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ @@ -1023,15 +1025,34 @@ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ +/* TPH Completer Support */ +#define PCI_EXP_DEVCAP2_TPH_COMP_NONE 0x0 /* None */ +#define PCI_EXP_DEVCAP2_TPH_COMP_TPH_ONLY 0x1 /* TPH only */ +#define PCI_EXP_DEVCAP2_TPH_COMP_EXT_TPH 0x3 /* TPH and Extended TPH */ + /* TPH Requester */ #define PCI_TPH_CAP 4 /* capability register */ -#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */ -#define PCI_TPH_LOC_NONE 0x000 /* no location */ -#define PCI_TPH_LOC_CAP 0x200 /* in capability */ -#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ -#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ -#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ -#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ +#define PCI_TPH_CAP_ST_NS 0x00000001 /* No ST Mode Supported */ +#define PCI_TPH_CAP_ST_IV 0x00000002 /* Interrupt Vector Mode Supported */ +#define PCI_TPH_CAP_ST_DS 0x00000004 /* Device Specific Mode Supported */ +#define PCI_TPH_CAP_EXT_TPH 0x00000100 /* Ext TPH Requester Supported */ +#define PCI_TPH_CAP_LOC_MASK 0x00000600 /* ST Table Location */ +#define PCI_TPH_LOC_NONE 0x00000000 /* Not present */ +#define PCI_TPH_LOC_CAP 0x00000200 /* In capability */ +#define PCI_TPH_LOC_MSIX 0x00000400 /* In MSI-X */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST Table Size */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* ST Table Size shift */ +#define PCI_TPH_BASE_SIZEOF 0xc /* Size with no ST table */ + +#define PCI_TPH_CTRL 8 /* control register */ +#define PCI_TPH_CTRL_MODE_SEL_MASK 0x00000007 /* ST Mode Select */ +#define PCI_TPH_ST_NS_MODE 0x0 /* No ST Mode */ +#define PCI_TPH_ST_IV_MODE 0x1 /* Interrupt Vector Mode */ +#define PCI_TPH_ST_DS_MODE 0x2 /* Device Specific Mode */ +#define PCI_TPH_CTRL_REQ_EN_MASK 0x00000300 /* TPH Requester Enable */ +#define PCI_TPH_REQ_DISABLE 0x0 /* No TPH requests allowed */ +#define PCI_TPH_REQ_TPH_ONLY 0x1 /* TPH only requests allowed */ +#define PCI_TPH_REQ_EXT_TPH 0x3 /* Extended TPH requests allowed */ /* Downstream Port Containment */ #define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ -- cgit v1.2.3 From d2e8a34876ce69b27f450eebfc550ab8e316f752 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:51 -0500 Subject: PCI/TPH: Add Steering Tag support Add pcie_tph_get_cpu_st() to allow a caller to retrieve Steering Tags for a target memory associated with a specific CPU. The ST tag is retrieved by invoking PCI ACPI "_DSM to Query Cache Locality TPH Features" method (rev=0x7, func=0xF) of the device's Root Port device. Add pcie_tph_set_st_entry() to update the device's Steering Tags. The tags will be written into the device's MSI-X table or the ST table located in the TPH Extended Capability space. Co-developed-by: Eric Van Tassell Link: https://lore.kernel.org/r/20241002165954.128085-3-wei.huang2@amd.com Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek --- include/linux/pci-tph.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index 58654a334ffb..c3e806c13d64 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -9,10 +9,33 @@ #ifndef LINUX_PCI_TPH_H #define LINUX_PCI_TPH_H +/* + * According to the ECN for PCI Firmware Spec, Steering Tag can be different + * depending on the memory type: Volatile Memory or Persistent Memory. When a + * caller query about a target's Steering Tag, it must provide the target's + * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470. + */ +enum tph_mem_type { + TPH_MEM_TYPE_VM, /* volatile memory */ + TPH_MEM_TYPE_PM /* persistent memory */ +}; + #ifdef CONFIG_PCIE_TPH +int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag); +int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); #else +static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag) +{ return -EINVAL; } +static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ return -EINVAL; } static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } -- cgit v1.2.3 From 277b339c4ba5c3d51f86892efd7bfbb012318ed8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:10 +0100 Subject: net: pcs: xpcs: move PCS reset to .pcs_pre_config() Move the PCS reset to .pcs_pre_config() rather than at creation time, which means we call the reset function with the interface that we're actually going to be using to talk to the downstream device. Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean # sja1105 Signed-off-by: Russell King (Oracle) Tested-by: for them? Link: https://patch.msgid.link/E1svfMA-005ZI3-Va@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/pcs/pcs-xpcs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index b4a4eb6c8866..fd75d0605bb6 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -61,6 +61,7 @@ struct dw_xpcs { struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; struct phylink_pcs pcs; phy_interface_t interface; + bool need_reset; }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -- cgit v1.2.3 From bedea1539acb808c870ad85b16f8d8f0bef5e0bb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:26 +0100 Subject: net: pcs: xpcs: add xpcs_destroy_pcs() and xpcs_create_pcs_mdiodev() Provide xpcs create/destroy functions that return and take a phylink_pcs pointer instead of an xpcs pointer. This will be used by drivers that have been converted to use phylink_pcs pointers internally, rather than dw_xpcs pointers. As xpcs_create_mdiodev() no longer makes use of its interface argument, pass PHY_INTERFACE_MODE_NA into xpcs_create_mdiodev() until it is removed later in the series. Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1svfMQ-005ZIL-Bi@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/pcs/pcs-xpcs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index fd75d0605bb6..a4e2243ce647 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -78,4 +78,7 @@ struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, phy_interface_t interface); void xpcs_destroy(struct dw_xpcs *xpcs); +struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr); +void xpcs_destroy_pcs(struct phylink_pcs *pcs); + #endif /* __LINUX_PCS_XPCS_H */ -- cgit v1.2.3 From bf5a61645bb2d51be53da5b26f948045b571147c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:51 +0100 Subject: net: pcs: xpcs: drop interface argument from xpcs_create*() The XPCS sub-driver no longer uses the "interface" argument to the xpcs_create_mdiodev() and xpcs_create_fwnode() functions. Remove this now unnecessary argument, updating the stmmac driver appropriately. Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1svfMp-005ZIp-UX@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/pcs/pcs-xpcs.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index a4e2243ce647..758daabb76c7 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -72,10 +72,8 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, - phy_interface_t interface); -struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, - phy_interface_t interface); +struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode); void xpcs_destroy(struct dw_xpcs *xpcs); struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr); -- cgit v1.2.3 From faefc9730d073e88a490e37b00e3e196b823abcb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:57 +0100 Subject: net: pcs: xpcs: make xpcs_do_config() and xpcs_link_up() internal As nothing outside pcs-xpcs.c calls neither xpcs_do_config() nor xpcs_link_up(), remove their exports and prototypes. Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1svfMv-005ZIv-2M@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/pcs/pcs-xpcs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 758daabb76c7..abda475111d1 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -65,10 +65,6 @@ struct dw_xpcs { }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, - phy_interface_t interface, int speed, int duplex); -int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - const unsigned long *advertising, unsigned int neg_mode); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -- cgit v1.2.3 From ced20ea315fe8591093f19574ec32222c1ab71ba Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 24 Sep 2024 13:48:43 +0530 Subject: soundwire: amd: pass acp pci revision id as resource data Pass ACP pci revision id as resource data and store it in amd SoundWire manager private data structure. This field will be used to differentiate ACP variants. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20240924081846.1834612-2-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_amd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 28a4eb77717f..e0abc59d4748 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -30,6 +30,7 @@ struct acp_sdw_pdata { u16 instance; + u32 acp_rev; /* mutex to protect acp common register access */ struct mutex *acp_sdw_lock; }; @@ -66,6 +67,7 @@ struct sdw_amd_dai_runtime { * @instance: SoundWire manager instance * @quirks: SoundWire manager quirks * @wake_en_mask: wake enable mask per SoundWire manager + * @acp_rev: acp pci device revision id * @clk_stopped: flag set to true when clock is stopped * @power_mode_mask: flag interprets amd SoundWire manager power mode * @dai_runtime_array: dai runtime array @@ -94,6 +96,7 @@ struct amd_sdw_manager { u32 quirks; u32 wake_en_mask; u32 power_mode_mask; + u32 acp_rev; bool clk_stopped; struct sdw_amd_dai_runtime **dai_runtime_array; @@ -134,6 +137,7 @@ struct sdw_amd_ctx { * struct sdw_amd_res - Soundwire AMD global resource structure, * typically populated by the DSP driver/Legacy driver * + * @acp_rev: acp pci device revision id * @addr: acp pci device resource start address * @reg_range: ACP register range * @link_mask: bit-wise mask listing links selected by the DSP driver/ @@ -146,6 +150,7 @@ struct sdw_amd_ctx { * @acp_lock: mutex protecting acp common registers access */ struct sdw_amd_res { + u32 acp_rev; u32 addr; u32 reg_range; u32 link_mask; -- cgit v1.2.3 From 7b54323dde29452dd06e6acd2701d9b489c9547d Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 24 Sep 2024 13:48:44 +0530 Subject: soundwire: amd: refactor existing code for acp 6.3 platform Refactor existing code by adding acp pci revision id coditional checks for ACP 6.3 platform. Rename the macros and structure names with ACP63 tag. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20240924081846.1834612-3-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_amd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index e0abc59d4748..c9586f22c5a9 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -27,6 +27,7 @@ #define ACP_SDW0 0 #define ACP_SDW1 1 #define AMD_SDW_MAX_MANAGER_COUNT 2 +#define ACP63_PCI_REV_ID 0x63 struct acp_sdw_pdata { u16 instance; -- cgit v1.2.3 From 444d6824a4feca142b0a57095a2f1f1bda98e2ab Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:38 +0800 Subject: soundwire: optimize sdw_stream_runtime memory layout pahole suggestion: swap position of 'm_rt_count' before: pahole -C sdw_stream_runtime drivers/soundwire/soundwire-bus.ko struct sdw_stream_runtime { const char * name; /* 0 8 */ struct sdw_stream_params params; /* 8 12 */ enum sdw_stream_state state; /* 20 4 */ enum sdw_stream_type type; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head master_list; /* 32 16 */ int m_rt_count; /* 48 4 */ /* size: 56, cachelines: 1, members: 6 */ /* sum members: 48, holes: 1, sum holes: 4 */ /* padding: 4 */ /* last cacheline: 56 bytes */ }; after: pahole --reorganize -C sdw_stream_runtime drivers/soundwire/soundwire-bus.ko struct sdw_stream_runtime { const char * name; /* 0 8 */ struct sdw_stream_params params; /* 8 12 */ enum sdw_stream_state state; /* 20 4 */ enum sdw_stream_type type; /* 24 4 */ int m_rt_count; /* 28 4 */ struct list_head master_list; /* 32 16 */ /* size: 48, cachelines: 1, members: 6 */ /* last cacheline: 48 bytes */ }; /* saved 8 bytes! */ Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 5e0dd47a0412..a4fa45132030 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -820,15 +820,15 @@ struct sdw_master_port_ops { struct sdw_msg; /** - * struct sdw_defer - SDW deffered message - * @length: message length + * struct sdw_defer - SDW deferred message * @complete: message completion * @msg: SDW message + * @length: message length */ struct sdw_defer { + struct sdw_msg *msg; int length; struct completion complete; - struct sdw_msg *msg; }; /** @@ -1010,18 +1010,18 @@ struct sdw_stream_params { * @params: Stream parameters * @state: Current state of the stream * @type: Stream type PCM or PDM + * @m_rt_count: Count of Master runtime(s) in this stream * @master_list: List of Master runtime(s) in this stream. * master_list can contain only one m_rt per Master instance * for a stream - * @m_rt_count: Count of Master runtime(s) in this stream */ struct sdw_stream_runtime { const char *name; struct sdw_stream_params params; enum sdw_stream_state state; enum sdw_stream_type type; - struct list_head master_list; int m_rt_count; + struct list_head master_list; }; struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); -- cgit v1.2.3 From 6cb2c156439430a7f9db2e1f71a7dccf1ca978bf Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:39 +0800 Subject: soundwire: optimize sdw_master_prop Make pahole happy by moving pointers and u64 first instead of interleaving them. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index a4fa45132030..2caea7345c3e 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -406,13 +406,14 @@ struct sdw_slave_prop { /** * struct sdw_master_prop - Master properties + * @clk_gears: Clock gears supported + * @clk_freq: Clock frequencies supported, in Hz + * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification * @revision: MIPI spec version of the implementation * @clk_stop_modes: Bitmap, bit N set when clock-stop-modeN supported * @max_clk_freq: Maximum Bus clock frequency, in Hz * @num_clk_gears: Number of clock gears supported - * @clk_gears: Clock gears supported * @num_clk_freq: Number of clock frequencies supported, in Hz - * @clk_freq: Clock frequencies supported, in Hz * @default_frame_rate: Controller default Frame rate, in Hz * @default_row: Number of rows * @default_col: Number of columns @@ -421,24 +422,23 @@ struct sdw_slave_prop { * command * @mclk_freq: clock reference passed to SoundWire Master, in Hz. * @hw_disabled: if true, the Master is not functional, typically due to pin-mux - * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification */ struct sdw_master_prop { + u32 *clk_gears; + u32 *clk_freq; + u64 quirks; u32 revision; u32 clk_stop_modes; u32 max_clk_freq; u32 num_clk_gears; - u32 *clk_gears; u32 num_clk_freq; - u32 *clk_freq; u32 default_frame_rate; u32 default_row; u32 default_col; - bool dynamic_frame; u32 err_threshold; u32 mclk_freq; + bool dynamic_frame; bool hw_disabled; - u64 quirks; }; /* Definitions for Master quirks */ -- cgit v1.2.3 From 0a323dad1c4e04048988cd04c60eaffd6ae61b1a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:40 +0800 Subject: soundwire: optimize sdw_bus structure The sdw_bus structure has seen multiple additions over the years. It's one of the most used structures in this subsystem, so there's merit in reshuffling the members a bit with 'pahole' to reduce holes and structures across cache lines. before: struct sdw_bus { struct device * dev; /* 0 8 */ struct sdw_master_device * md; /* 8 8 */ int controller_id; /* 16 4 */ unsigned int link_id; /* 20 4 */ int id; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head slaves; /* 32 16 */ long unsigned int assigned[1]; /* 48 8 */ struct mutex bus_lock; /* 56 160 */ /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */ struct lock_class_key bus_lock_key; /* 216 16 */ struct mutex msg_lock; /* 232 160 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct lock_class_key msg_lock_key; /* 392 16 */ int (*compute_params)(struct sdw_bus *); /* 408 8 */ const struct sdw_master_ops * ops; /* 416 8 */ const struct sdw_master_port_ops * port_ops; /* 424 8 */ struct sdw_bus_params params; /* 432 36 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 7 boundary (448 bytes) was 24 bytes ago --- */ struct sdw_master_prop prop; /* 472 72 */ /* XXX last struct has 6 bytes of padding */ /* --- cacheline 8 boundary (512 bytes) was 32 bytes ago --- */ void * vendor_specific_prop; /* 544 8 */ struct list_head m_rt_list; /* 552 16 */ struct dentry * debugfs; /* 568 8 */ /* --- cacheline 9 boundary (576 bytes) --- */ struct irq_chip irq_chip; /* 576 264 */ /* --- cacheline 13 boundary (832 bytes) was 8 bytes ago --- */ struct irq_domain * domain; /* 840 8 */ struct sdw_defer defer_msg; /* 848 112 */ /* --- cacheline 15 boundary (960 bytes) --- */ unsigned int clk_stop_timeout; /* 960 4 */ u32 bank_switch_timeout; /* 964 4 */ bool multi_link; /* 968 1 */ /* XXX 3 bytes hole, try to pack */ int hw_sync_min_links; /* 972 4 */ int stream_refcount; /* 976 4 */ /* size: 984, cachelines: 16, members: 27 */ /* sum members: 969, holes: 3, sum holes: 11 */ /* padding: 4 */ /* paddings: 1, sum paddings: 6 */ /* last cacheline: 24 bytes */ }; after: struct sdw_bus { struct device * dev; /* 0 8 */ struct sdw_master_device * md; /* 8 8 */ struct lock_class_key bus_lock_key; /* 16 16 */ struct mutex bus_lock; /* 32 160 */ /* --- cacheline 3 boundary (192 bytes) --- */ struct list_head slaves; /* 192 16 */ struct lock_class_key msg_lock_key; /* 208 16 */ struct mutex msg_lock; /* 224 160 */ /* --- cacheline 6 boundary (384 bytes) --- */ struct list_head m_rt_list; /* 384 16 */ struct sdw_defer defer_msg; /* 400 112 */ /* --- cacheline 8 boundary (512 bytes) --- */ struct sdw_bus_params params; /* 512 36 */ int stream_refcount; /* 548 4 */ const struct sdw_master_ops * ops; /* 552 8 */ const struct sdw_master_port_ops * port_ops; /* 560 8 */ struct sdw_master_prop prop; /* 568 72 */ /* XXX last struct has 6 bytes of padding */ /* --- cacheline 10 boundary (640 bytes) --- */ void * vendor_specific_prop; /* 640 8 */ int hw_sync_min_links; /* 648 4 */ int controller_id; /* 652 4 */ unsigned int link_id; /* 656 4 */ int id; /* 660 4 */ int (*compute_params)(struct sdw_bus *); /* 664 8 */ long unsigned int assigned[1]; /* 672 8 */ unsigned int clk_stop_timeout; /* 680 4 */ u32 bank_switch_timeout; /* 684 4 */ struct irq_chip irq_chip; /* 688 264 */ /* --- cacheline 14 boundary (896 bytes) was 56 bytes ago --- */ struct irq_domain * domain; /* 952 8 */ /* --- cacheline 15 boundary (960 bytes) --- */ struct dentry * debugfs; /* 960 8 */ bool multi_link; /* 968 1 */ /* size: 976, cachelines: 16, members: 27 */ /* padding: 7 */ /* paddings: 1, sum paddings: 6 */ /* last cacheline: 16 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 77 ++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 2caea7345c3e..6fcf122c1831 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -871,68 +871,71 @@ struct sdw_master_ops { * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. - * @link_id: Link id number, can be 0 to N, unique for each Controller - * @id: bus system-wide unique id - * @slaves: list of Slaves on this bus - * @assigned: Bitmap for Slave device numbers. - * Bit set implies used number, bit clear implies unused number. + * @bus_lock_key: bus lock key associated to @bus_lock * @bus_lock: bus lock + * @slaves: list of Slaves on this bus + * @msg_lock_key: message lock key associated to @msg_lock * @msg_lock: message lock - * @compute_params: points to Bus resource management implementation - * @ops: Master callback ops - * @port_ops: Master port callback ops - * @params: Current bus parameters - * @prop: Master properties - * @vendor_specific_prop: pointer to non-standard properties * @m_rt_list: List of Master instance of all stream(s) running on Bus. This * is used to compute and program bus bandwidth, clock, frame shape, * transport and port parameters - * @debugfs: Bus debugfs - * @domain: IRQ domain * @defer_msg: Defer message - * @clk_stop_timeout: Clock stop timeout computed - * @bank_switch_timeout: Bank switch timeout computed - * @multi_link: Store bus property that indicates if multi links - * are supported. This flag is populated by drivers after reading - * appropriate firmware (ACPI/DT). + * @params: Current bus parameters + * @stream_refcount: number of streams currently using this bus + * @ops: Master callback ops + * @port_ops: Master port callback ops + * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties * @hw_sync_min_links: Number of links used by a stream above which * hardware-based synchronization is required. This value is only * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. - * @stream_refcount: number of streams currently using this bus + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller + * @id: bus system-wide unique id + * @compute_params: points to Bus resource management implementation + * @assigned: Bitmap for Slave device numbers. + * Bit set implies used number, bit clear implies unused number. + * @clk_stop_timeout: Clock stop timeout computed + * @bank_switch_timeout: Bank switch timeout computed + * @domain: IRQ domain + * @irq_chip: IRQ chip + * @debugfs: Bus debugfs (optional) + * @multi_link: Store bus property that indicates if multi links + * are supported. This flag is populated by drivers after reading + * appropriate firmware (ACPI/DT). */ struct sdw_bus { struct device *dev; struct sdw_master_device *md; - int controller_id; - unsigned int link_id; - int id; - struct list_head slaves; - DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); - struct mutex bus_lock; struct lock_class_key bus_lock_key; - struct mutex msg_lock; + struct mutex bus_lock; + struct list_head slaves; struct lock_class_key msg_lock_key; - int (*compute_params)(struct sdw_bus *bus); + struct mutex msg_lock; + struct list_head m_rt_list; + struct sdw_defer defer_msg; + struct sdw_bus_params params; + int stream_refcount; const struct sdw_master_ops *ops; const struct sdw_master_port_ops *port_ops; - struct sdw_bus_params params; struct sdw_master_prop prop; void *vendor_specific_prop; - struct list_head m_rt_list; + int hw_sync_min_links; + int controller_id; + unsigned int link_id; + int id; + int (*compute_params)(struct sdw_bus *bus); + DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); + unsigned int clk_stop_timeout; + u32 bank_switch_timeout; + struct irq_chip irq_chip; + struct irq_domain *domain; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; #endif - struct irq_chip irq_chip; - struct irq_domain *domain; - struct sdw_defer defer_msg; - unsigned int clk_stop_timeout; - u32 bank_switch_timeout; bool multi_link; - int hw_sync_min_links; - int stream_refcount; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, -- cgit v1.2.3 From 1c758df5a83ea0c9b5055536336d8a586b5010b0 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:41 +0800 Subject: soundwire: optimize sdw_slave_prop move pointers first, and move booleans together. before: struct sdw_slave_prop { u32 mipi_revision; /* 0 4 */ bool wake_capable; /* 4 1 */ bool test_mode_capable; /* 5 1 */ bool clk_stop_mode1; /* 6 1 */ bool simple_clk_stop_capable; /* 7 1 */ u32 clk_stop_timeout; /* 8 4 */ u32 ch_prep_timeout; /* 12 4 */ enum sdw_clk_stop_reset_behave reset_behave; /* 16 4 */ bool high_PHY_capable; /* 20 1 */ bool paging_support; /* 21 1 */ bool bank_delay_support; /* 22 1 */ /* XXX 1 byte hole, try to pack */ enum sdw_p15_behave p15_behave; /* 24 4 */ bool lane_control_support; /* 28 1 */ /* XXX 3 bytes hole, try to pack */ u32 master_count; /* 32 4 */ u32 source_ports; /* 36 4 */ u32 sink_ports; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ struct sdw_dp0_prop * dp0_prop; /* 48 8 */ struct sdw_dpn_prop * src_dpn_prop; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct sdw_dpn_prop * sink_dpn_prop; /* 64 8 */ u8 scp_int1_mask; /* 72 1 */ /* XXX 3 bytes hole, try to pack */ u32 quirks; /* 76 4 */ bool clock_reg_supported; /* 80 1 */ bool use_domain_irq; /* 81 1 */ /* size: 88, cachelines: 2, members: 23 */ /* sum members: 71, holes: 4, sum holes: 11 */ /* padding: 6 */ /* last cacheline: 24 bytes */ }; after: truct sdw_slave_prop { struct sdw_dp0_prop * dp0_prop; /* 0 8 */ struct sdw_dpn_prop * src_dpn_prop; /* 8 8 */ struct sdw_dpn_prop * sink_dpn_prop; /* 16 8 */ u32 mipi_revision; /* 24 4 */ bool wake_capable; /* 28 1 */ bool test_mode_capable; /* 29 1 */ bool clk_stop_mode1; /* 30 1 */ bool simple_clk_stop_capable; /* 31 1 */ u32 clk_stop_timeout; /* 32 4 */ u32 ch_prep_timeout; /* 36 4 */ enum sdw_clk_stop_reset_behave reset_behave; /* 40 4 */ bool high_PHY_capable; /* 44 1 */ bool paging_support; /* 45 1 */ bool bank_delay_support; /* 46 1 */ bool lane_control_support; /* 47 1 */ enum sdw_p15_behave p15_behave; /* 48 4 */ u32 master_count; /* 52 4 */ u32 source_ports; /* 56 4 */ u32 sink_ports; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 quirks; /* 64 4 */ u8 scp_int1_mask; /* 68 1 */ bool clock_reg_supported; /* 69 1 */ bool use_domain_irq; /* 70 1 */ /* size: 72, cachelines: 2, members: 23 */ /* padding: 1 */ /* last cacheline: 8 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-6-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 6fcf122c1831..38db81f5bdb9 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -344,6 +344,9 @@ struct sdw_dpn_prop { /** * struct sdw_slave_prop - SoundWire Slave properties + * @dp0_prop: Data Port 0 properties + * @src_dpn_prop: Source Data Port N properties + * @sink_dpn_prop: Sink Data Port N properties * @mipi_revision: Spec version of the implementation * @wake_capable: Wake-up events are supported * @test_mode_capable: If test mode is supported @@ -360,15 +363,12 @@ struct sdw_dpn_prop { * SCP_AddrPage2 * @bank_delay_support: Slave implements bank delay/bridge support registers * SCP_BankDelay and SCP_NextFrame + * @lane_control_support: Slave supports lane control * @p15_behave: Slave behavior when the Master attempts a read to the Port15 * alias - * @lane_control_support: Slave supports lane control * @master_count: Number of Masters present on this Slave * @source_ports: Bitmap identifying source ports * @sink_ports: Bitmap identifying sink ports - * @dp0_prop: Data Port 0 properties - * @src_dpn_prop: Source Data Port N properties - * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification * @clock_reg_supported: the Peripheral implements the clock base and scale @@ -377,6 +377,9 @@ struct sdw_dpn_prop { * @use_domain_irq: call actual IRQ handler on slave, as well as callback */ struct sdw_slave_prop { + struct sdw_dp0_prop *dp0_prop; + struct sdw_dpn_prop *src_dpn_prop; + struct sdw_dpn_prop *sink_dpn_prop; u32 mipi_revision; bool wake_capable; bool test_mode_capable; @@ -388,16 +391,13 @@ struct sdw_slave_prop { bool high_PHY_capable; bool paging_support; bool bank_delay_support; - enum sdw_p15_behave p15_behave; bool lane_control_support; + enum sdw_p15_behave p15_behave; u32 master_count; u32 source_ports; u32 sink_ports; - struct sdw_dp0_prop *dp0_prop; - struct sdw_dpn_prop *src_dpn_prop; - struct sdw_dpn_prop *sink_dpn_prop; - u8 scp_int1_mask; u32 quirks; + u8 scp_int1_mask; bool clock_reg_supported; bool use_domain_irq; }; -- cgit v1.2.3 From 557e28f8b53243097162cf4d3e59bcee9fb9713b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:42 +0800 Subject: soundwire: optimize sdw_dp0_prop Move pointers and booleans. Before: struct sdw_dp0_prop { u32 max_word; /* 0 4 */ u32 min_word; /* 4 4 */ u32 num_words; /* 8 4 */ /* XXX 4 bytes hole, try to pack */ u32 * words; /* 16 8 */ bool BRA_flow_controlled; /* 24 1 */ bool simple_ch_prep_sm; /* 25 1 */ /* XXX 2 bytes hole, try to pack */ u32 ch_prep_timeout; /* 28 4 */ bool imp_def_interrupts; /* 32 1 */ /* size: 40, cachelines: 1, members: 8 */ /* sum members: 27, holes: 2, sum holes: 6 */ /* padding: 7 */ /* last cacheline: 40 bytes */ }; after: struct sdw_dp0_prop { u32 * words; /* 0 8 */ u32 max_word; /* 8 4 */ u32 min_word; /* 12 4 */ u32 num_words; /* 16 4 */ u32 ch_prep_timeout; /* 20 4 */ bool BRA_flow_controlled; /* 24 1 */ bool simple_ch_prep_sm; /* 25 1 */ bool imp_def_interrupts; /* 26 1 */ /* size: 32, cachelines: 1, members: 8 */ /* padding: 5 */ /* last cacheline: 32 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-7-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 38db81f5bdb9..c72095137a35 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -226,16 +226,16 @@ enum sdw_clk_stop_mode { /** * struct sdw_dp0_prop - DP0 properties + * @words: wordlengths supported * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) * @min_word: Minimum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) * @num_words: number of wordlengths supported - * @words: wordlengths supported + * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @BRA_flow_controlled: Slave implementation results in an OK_NotReady * response * @simple_ch_prep_sm: If channel prepare sequence is required - * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @@ -244,13 +244,13 @@ enum sdw_clk_stop_mode { * support */ struct sdw_dp0_prop { + u32 *words; u32 max_word; u32 min_word; u32 num_words; - u32 *words; + u32 ch_prep_timeout; bool BRA_flow_controlled; bool simple_ch_prep_sm; - u32 ch_prep_timeout; bool imp_def_interrupts; }; -- cgit v1.2.3 From 9942f90bdcc035eb5f01d7343dac99bd805ef3ec Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:43 +0800 Subject: soundwire: optimize sdw_dpn_prop before: struct sdw_dpn_prop { u32 num; /* 0 4 */ u32 max_word; /* 4 4 */ u32 min_word; /* 8 4 */ u32 num_words; /* 12 4 */ u32 * words; /* 16 8 */ enum sdw_dpn_type type; /* 24 4 */ u32 max_grouping; /* 28 4 */ bool simple_ch_prep_sm; /* 32 1 */ /* XXX 3 bytes hole, try to pack */ u32 ch_prep_timeout; /* 36 4 */ u32 imp_def_interrupts; /* 40 4 */ u32 max_ch; /* 44 4 */ u32 min_ch; /* 48 4 */ u32 num_channels; /* 52 4 */ u32 * channels; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 num_ch_combinations; /* 64 4 */ /* XXX 4 bytes hole, try to pack */ u32 * ch_combinations; /* 72 8 */ u32 modes; /* 80 4 */ u32 max_async_buffer; /* 84 4 */ bool block_pack_mode; /* 88 1 */ bool read_only_wordlength; /* 89 1 */ /* XXX 2 bytes hole, try to pack */ u32 port_encoding; /* 92 4 */ struct sdw_dpn_audio_mode * audio_modes; /* 96 8 */ /* size: 104, cachelines: 2, members: 22 */ /* sum members: 95, holes: 3, sum holes: 9 */ /* last cacheline: 40 bytes */ }; after: struct sdw_dpn_prop { struct sdw_dpn_audio_mode * audio_modes; /* 0 8 */ u32 num; /* 8 4 */ u32 max_word; /* 12 4 */ u32 min_word; /* 16 4 */ u32 num_words; /* 20 4 */ u32 * words; /* 24 8 */ enum sdw_dpn_type type; /* 32 4 */ u32 max_grouping; /* 36 4 */ u32 ch_prep_timeout; /* 40 4 */ u32 imp_def_interrupts; /* 44 4 */ u32 max_ch; /* 48 4 */ u32 min_ch; /* 52 4 */ u32 num_channels; /* 56 4 */ u32 num_ch_combinations; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 * channels; /* 64 8 */ u32 * ch_combinations; /* 72 8 */ u32 modes; /* 80 4 */ u32 max_async_buffer; /* 84 4 */ u32 port_encoding; /* 88 4 */ bool block_pack_mode; /* 92 1 */ bool read_only_wordlength; /* 93 1 */ bool simple_ch_prep_sm; /* 94 1 */ /* size: 96, cachelines: 2, members: 22 */ /* padding: 1 */ /* last cacheline: 32 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-8-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index c72095137a35..cc0afb8af333 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -288,6 +288,7 @@ struct sdw_dpn_audio_mode { /** * struct sdw_dpn_prop - Data Port DPn properties + * @audio_modes: Audio modes supported * @num: port number * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) @@ -298,26 +299,26 @@ struct sdw_dpn_audio_mode { * @type: Data port type. Full, Simplified or Reduced * @max_grouping: Maximum number of samples that can be grouped together for * a full data port - * @simple_ch_prep_sm: If the port supports simplified channel prepare state - * machine * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @max_ch: Maximum channels supported * @min_ch: Minimum channels supported * @num_channels: Number of discrete channels supported - * @channels: Discrete channels supported * @num_ch_combinations: Number of channel combinations supported + * @channels: Discrete channels supported * @ch_combinations: Channel combinations supported * @modes: SDW mode supported * @max_async_buffer: Number of samples that this port can buffer in * asynchronous modes + * @port_encoding: Payload Channel Sample encoding schemes supported * @block_pack_mode: Type of block port mode supported * @read_only_wordlength: Read Only wordlength field in DPN_BlockCtrl1 register - * @port_encoding: Payload Channel Sample encoding schemes supported - * @audio_modes: Audio modes supported + * @simple_ch_prep_sm: If the port supports simplified channel prepare state + * machine */ struct sdw_dpn_prop { + struct sdw_dpn_audio_mode *audio_modes; u32 num; u32 max_word; u32 min_word; @@ -325,21 +326,20 @@ struct sdw_dpn_prop { u32 *words; enum sdw_dpn_type type; u32 max_grouping; - bool simple_ch_prep_sm; u32 ch_prep_timeout; u32 imp_def_interrupts; u32 max_ch; u32 min_ch; u32 num_channels; - u32 *channels; u32 num_ch_combinations; + u32 *channels; u32 *ch_combinations; u32 modes; u32 max_async_buffer; + u32 port_encoding; bool block_pack_mode; bool read_only_wordlength; - u32 port_encoding; - struct sdw_dpn_audio_mode *audio_modes; + bool simple_ch_prep_sm; }; /** -- cgit v1.2.3 From 1ae4aa59d79399be0591c8d78c44e280406e2c34 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:44 +0800 Subject: soundwire: mipi-disco: remove DPn audio-modes The concept of DPn audio-modes was never used by anyone, and was removed from the DisCo for SoundWire 2.0 specification. Remove the definitions and TODO. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-9-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index cc0afb8af333..66feaa79ecfc 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -254,41 +254,8 @@ struct sdw_dp0_prop { bool imp_def_interrupts; }; -/** - * struct sdw_dpn_audio_mode - Audio mode properties for DPn - * @bus_min_freq: Minimum bus frequency, in Hz - * @bus_max_freq: Maximum bus frequency, in Hz - * @bus_num_freq: Number of discrete frequencies supported - * @bus_freq: Discrete bus frequencies, in Hz - * @min_freq: Minimum sampling frequency, in Hz - * @max_freq: Maximum sampling bus frequency, in Hz - * @num_freq: Number of discrete sampling frequency supported - * @freq: Discrete sampling frequencies, in Hz - * @prep_ch_behave: Specifies the dependencies between Channel Prepare - * sequence and bus clock configuration - * If 0, Channel Prepare can happen at any Bus clock rate - * If 1, Channel Prepare sequence shall happen only after Bus clock is - * changed to a frequency supported by this mode or compatible modes - * described by the next field - * @glitchless: Bitmap describing possible glitchless transitions from this - * Audio Mode to other Audio Modes - */ -struct sdw_dpn_audio_mode { - u32 bus_min_freq; - u32 bus_max_freq; - u32 bus_num_freq; - u32 *bus_freq; - u32 max_freq; - u32 min_freq; - u32 num_freq; - u32 *freq; - u32 prep_ch_behave; - u32 glitchless; -}; - /** * struct sdw_dpn_prop - Data Port DPn properties - * @audio_modes: Audio modes supported * @num: port number * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) @@ -318,7 +285,6 @@ struct sdw_dpn_audio_mode { * machine */ struct sdw_dpn_prop { - struct sdw_dpn_audio_mode *audio_modes; u32 num; u32 max_word; u32 min_word; -- cgit v1.2.3 From 543bd28a3bfeff31f748ba83348b63313dd37ff9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:49 +0800 Subject: soundwire: mipi-disco: add new properties from 2.0 spec The DisCo for SoundWire 2.0 spec adds support for new 'mipi-sdw-sdca-interrupt-register-list' and 'mipi-sdw-commit-register-supported'. This patch only adds the definitions and property reads, but the use of these properties will come at some point in the future when needed. Note a slight conceptual disconnect between the MIPI DisCo definition of a boolean property and the Linux implementation. The latter only checks the presence of the property to set its value to 'true', whereas the MIPI definitions allow for a property with a 'false' value. This patch uses the new introduced mipi_device_property_read_bool() to handle it. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-14-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 66feaa79ecfc..952514f044f0 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -335,8 +335,11 @@ struct sdw_dpn_prop { * @master_count: Number of Masters present on this Slave * @source_ports: Bitmap identifying source ports * @sink_ports: Bitmap identifying sink ports - * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @sdca_interrupt_register_list: indicates which sets of SDCA interrupt status + * and masks are supported + * @commit_register_supported: is PCP_Commit register supported + * @scp_int1_mask: SCP_INT1_MASK desired settings * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. @@ -363,6 +366,8 @@ struct sdw_slave_prop { u32 source_ports; u32 sink_ports; u32 quirks; + u32 sdca_interrupt_register_list; + u8 commit_register_supported; u8 scp_int1_mask; bool clock_reg_supported; bool use_domain_irq; -- cgit v1.2.3 From 71b405b184449fffcb76ea0814104b71dfdb2aee Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:50 +0800 Subject: soundwire: mipi-disco: add support for DP0/DPn 'lane-list' property The SoundWire specification did not clearly require that ports could use all Lanes. Some SoundWire/SDCA peripheral adopters added restrictions on which lanes can be used by what port, and the DisCo for SoundWire 2.1 specification added a 'lane-list' property to model this hardware limitation. When not specified, the ports can use all Lanes. Otherwise, the 'lane-list' indicates which Lanes can be used, sorted by order of preference (most-preferred-first). This patch only reads the properties, the use of this property will come at a later time with multi-lane support. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-15-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 952514f044f0..73f655334fe9 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -238,6 +238,8 @@ enum sdw_clk_stop_mode { * @simple_ch_prep_sm: If channel prepare sequence is required * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts + * @num_lanes: array size of @lane_list + * @lane_list: indicates which Lanes can be used by DP0 * * The wordlengths are specified by Spec as max, min AND number of * discrete values, implementation can define based on the wordlengths they @@ -252,6 +254,8 @@ struct sdw_dp0_prop { bool BRA_flow_controlled; bool simple_ch_prep_sm; bool imp_def_interrupts; + int num_lanes; + u32 *lane_list; }; /** @@ -275,6 +279,8 @@ struct sdw_dp0_prop { * @num_ch_combinations: Number of channel combinations supported * @channels: Discrete channels supported * @ch_combinations: Channel combinations supported + * @lane_list: indicates which Lanes can be used by DPn + * @num_lanes: array size of @lane_list * @modes: SDW mode supported * @max_async_buffer: Number of samples that this port can buffer in * asynchronous modes @@ -300,6 +306,8 @@ struct sdw_dpn_prop { u32 num_ch_combinations; u32 *channels; u32 *ch_combinations; + u32 *lane_list; + int num_lanes; u32 modes; u32 max_async_buffer; u32 port_encoding; -- cgit v1.2.3 From e26a0c5d828b225b88f534e2fcf10bf617f85f23 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Sun, 29 Sep 2024 20:44:35 -0700 Subject: net: mana: Increase the DEF_RX_BUFFERS_PER_QUEUE to 1024 Through some experiments, we found out that increasing the default RX buffers count from 512 to 1024, gives slightly better throughput and significantly reduces the no_wqe_rx errs on the receiver side. Along with these, other parameters like cpu usage, retrans seg etc also show some improvement with 1024 value. Following are some snippets from the experiments ntttcp tests with 512 Rx buffers --------------------------------------- connections| throughput| no_wqe errs| --------------------------------------- 1 | 40.93Gbps | 123,211 | 16 | 180.15Gbps | 190,120 | 128 | 180.20Gbps | 173,508 | 256 | 180.27Gbps | 189,884 | ntttcp tests with 1024 Rx buffers --------------------------------------- connections| throughput| no_wqe errs| --------------------------------------- 1 | 44.22Gbps | 19,864 | 16 | 180.19Gbps | 4,430 | 128 | 180.21Gbps | 2,560 | 256 | 180.29Gbps | 1,529 | So, increasing the default RX buffers per queue count to 1024 Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/1727667875-29908-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index f2a5200d8a0f..9b0faa24b758 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -43,7 +43,7 @@ enum TRI_STATE { * size beyond this value gets rejected by __alloc_page() call. */ #define MAX_RX_BUFFERS_PER_QUEUE 8192 -#define DEF_RX_BUFFERS_PER_QUEUE 512 +#define DEF_RX_BUFFERS_PER_QUEUE 1024 #define MIN_RX_BUFFERS_PER_QUEUE 128 /* This max value for TX buffers is derived as the maximum allocatable -- cgit v1.2.3 From aa3ab3336e6052dba95dac9f5c268eb7d1823002 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 Sep 2024 14:33:19 +0200 Subject: scsi: libfcoe: Include instead of Substitute the inclusion of header with to allow the removal of legacy inclusion of from . Signed-off-by: Uros Bizjak Reviewed-by: Bart Van Assche Cc: Hannes Reinecke Cc: James E.J. Bottomley Cc: Martin K. Petersen Signed-off-by: Jason A. Donenfeld --- include/scsi/libfcoe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 3c5899290aed..6616348e59b9 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 5b3fdc9f2ff10d3cee106ddaa0ee6636c7de381e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 Sep 2024 14:33:29 +0200 Subject: random: Do not include in Files that use prandom infrastructure are now converted to use header instead of . Remove the legacy inclusion of from . This is the "nice cleanup" part, wished in c0842fbc1b18. Signed-off-by: Uros Bizjak Fixes: c0842fbc1b18 ("random32: move the pseudo-random 32-bit definitions to prandom.h") Cc: Theodore Ts'o Cc: Jason A. Donenfeld Cc: Linus Torvalds Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index b0a940af4fff..333cecfca93f 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -145,13 +145,6 @@ declare_get_random_var_wait(u64, u32) declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var -/* - * This is designed to be standalone for just prandom - * users, but for now we include it from - * for legacy reasons. - */ -#include - #ifdef CONFIG_SMP int random_prepare_cpu(unsigned int cpu); int random_online_cpu(unsigned int cpu); -- cgit v1.2.3 From d18c13697b4dcbf6a8f06c3d8e564c4f5ad1477c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 Sep 2024 14:33:30 +0200 Subject: prandom: Include in include was removed from in d9f29deb7fe8 ("prandom: Remove unused include") because this inclusion broke arm64 due to a circular dependency on include files. __percpu tag is defined in include/linux/compiler_types.h, so there is currently no direct need for the inclusion of . However, in [1] we would like to repurpose __percpu tag as a named address space qualifier, where __percpu macro uses defines from . The circular dependency was removed in ddd8e37ebaa1 ("random: Do not include in ") and it cleared the path for the inclusion of in . This patch is basically a revert of d9f29deb7fe8 ("prandom: Remove unused include"). [1] https://lore.kernel.org/lkml/20240812115945.484051-4-ubizjak@gmail.com/ Signed-off-by: Uros Bizjak Cc: Theodore Ts'o Cc: Jason A. Donenfeld Cc: Kent Overstreet Signed-off-by: Jason A. Donenfeld --- include/linux/prandom.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/prandom.h b/include/linux/prandom.h index f7f1e5251c67..f2ed5b72b3d6 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,6 +10,7 @@ #include #include +#include #include struct rnd_state { -- cgit v1.2.3 From 7788d320ba5ecbfa88d0be8c32ef8f018f2f020f Mon Sep 17 00:00:00 2001 From: Antonino Maniscalco Date: Thu, 3 Oct 2024 18:12:59 +0200 Subject: drm/msm/a6xx: Add a flag to allow preemption to submitqueue_create Some userspace changes are necessary so add a flag for userspace to advertise support for preemption when creating the submitqueue. When this flag is not set preemption will not be allowed in the middle of the submitted IBs therefore mantaining compatibility with older userspace. The flag is rejected if preemption is not supported on the target, this allows userspace to know whether preemption is supported. Tested-by: Rob Clark Tested-by: Neil Armstrong # on SM8650-QRD Tested-by: Neil Armstrong # on SM8550-QRD Tested-by: Neil Armstrong # on SM8450-HDK Signed-off-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/618028/ Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2377147b6af0..b916aab80dde 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -347,7 +347,10 @@ struct drm_msm_gem_madvise { * backwards compatibility as a "default" submitqueue */ -#define MSM_SUBMITQUEUE_FLAGS (0) +#define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_FLAGS ( \ + MSM_SUBMITQUEUE_ALLOW_PREEMPT | \ + 0) /* * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, -- cgit v1.2.3 From 816ad8f1e498fe5e9e992da137316302219f2137 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Oct 2024 14:51:52 -0700 Subject: lib: packing: remove kernel-doc from header file It is not necessary to have the kernel-doc duplicated both in the header and in the implementation. It is better to have it near the implementation of the function, since in C, a function can have N declarations, but only one definition. Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-3-8373e551eae3@intel.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index 8d6571feb95d..69baefebcd02 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -17,32 +17,6 @@ enum packing_op { UNPACK, }; -/** - * packing - Convert numbers (currently u64) between a packed and an unpacked - * format. Unpacked means laid out in memory in the CPU's native - * understanding of integers, while packed means anything else that - * requires translation. - * - * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. - * @startbit: The index (in logical notation, compensated for quirks) where - * the packed value starts within pbuf. Must be larger than, or - * equal to, endbit. - * @endbit: The index (in logical notation, compensated for quirks) where - * the packed value ends within pbuf. Must be smaller than, or equal - * to, startbit. - * @op: If PACK, then uval will be treated as const pointer and copied (packed) - * into pbuf, between startbit and endbit. - * If UNPACK, then pbuf will be treated as const pointer and the logical - * value between startbit and endbit will be copied (unpacked) to uval. - * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and - * QUIRK_MSB_ON_THE_RIGHT. - * - * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. - * If op is PACK, pbuf is modified. - * If op is UNPACK, uval is modified. - */ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); -- cgit v1.2.3 From 7263f64e16d90ded44ce211381b2def83db32fd9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Oct 2024 14:51:53 -0700 Subject: lib: packing: add pack() and unpack() wrappers over packing() Geert Uytterhoeven described packing() as "really bad API" because of not being able to enforce const correctness. The same function is used both when "pbuf" is input and "uval" is output, as in the other way around. Create 2 wrapper functions where const correctness can be ensured. Do ugly type casts inside, to be able to reuse packing() as currently implemented - which will _not_ modify the input argument. Also, take the opportunity to change the type of startbit and endbit to size_t - an unsigned type - in these new function prototypes. When int, an extra check for negative values is necessary. Hopefully, when packing() goes away completely, that check can be dropped. My concern is that code which does rely on the conditional directionality of packing() is harder to refactor without blowing up in size. So it may take a while to completely eliminate packing(). But let's make alternatives available for those who do not need that. Link: https://lore.kernel.org/netdev/20210223112003.2223332-1-geert+renesas@glider.be/ Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-4-8373e551eae3@intel.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index 69baefebcd02..ea25cb93cc70 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -20,4 +20,10 @@ enum packing_op { int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); +int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks); + +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks); + #endif -- cgit v1.2.3 From 28aec9ca29f05dabb835293acc6e202bf51b8092 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Oct 2024 14:51:54 -0700 Subject: lib: packing: duplicate pack() and unpack() implementations packing() is now used in some hot paths, and it would be good to get rid of some ifs and buts that depend on "op", to speed things up a little bit. With the main implementations now taking size_t endbit, we no longer have to check for negative values. Update the local integer variables to also be size_t to match. Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-5-8373e551eae3@intel.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index ea25cb93cc70..5d36dcd06f60 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -20,8 +20,8 @@ enum packing_op { int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); -int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit, - size_t pbuflen, u8 quirks); +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks); int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks); -- cgit v1.2.3 From 7e863e5db6185b1add0df4cb01b31a4ed1c4b738 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 1 Oct 2024 21:28:43 +0200 Subject: ipv4: Convert ip_route_input() to dscp_t. Pass a dscp_t variable to ip_route_input(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_route_input() to consider are: * input_action_end_dx4_finish() and input_action_end_dt4() in net/ipv6/seg6_local.c. These functions set the tos parameter to 0, which is already a valid dscp_t value, so they don't need to be adjusted for the new prototype. * icmp_route_lookup(), which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * br_nf_pre_routing_finish(), ip_options_rcv_srr() and ip4ip6_err(), which get the DSCP directly from IPv4 headers. Define a helper to read the .tos field of struct iphdr as dscp_t, so that these function don't have to do the conversion manually. While there, declare *iph as const in br_nf_pre_routing_finish(), declare its local variables in reverse-christmas-tree order and move the "err = ip_route_input()" assignment out of the conditional to avoid checkpatch warning. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/e9d40781d64d3d69f4c79ac8a008b8d67a033e8d.1727807926.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 5 +++++ include/net/route.h | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index d92d3bc3ec0e..bab084df1567 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -424,6 +424,11 @@ int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +static inline dscp_t ip4h_dscp(const struct iphdr *ip4h) +{ + return inet_dsfield_to_dscp(ip4h->tos); +} + static inline int ip_mtu_locked(const struct dst_entry *dst) { const struct rtable *rt = dst_rtable(dst); diff --git a/include/net/route.h b/include/net/route.h index 1789f1e6640b..03dd28cf4bc4 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -208,12 +208,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) + dscp_t dscp, struct net_device *devin) { int err; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, tos, devin); + err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp), + devin); if (!err) { skb_dst_force(skb); if (!skb_dst(skb)) -- cgit v1.2.3 From 66fb6386d358a04edd5c640e38b4a02b323b89d8 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 1 Oct 2024 21:28:49 +0200 Subject: ipv4: Convert ip_route_input_noref() to dscp_t. Pass a dscp_t variable to ip_route_input_noref(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_route_input_noref() to consider are: * arp_process() in net/ipv4/arp.c. This function sets the tos parameter to 0, which is already a valid dscp_t value, so it doesn't need to be adjusted for the new prototype. * ip_route_input(), which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * ipvlan_l3_rcv(), bpf_lwt_input_reroute(), ip_expire(), ip_rcv_finish_core(), xfrm4_rcv_encap_finish() and xfrm4_rcv_encap(), which get the DSCP directly from IPv4 headers and can simply use the ip4h_dscp() helper. While there, declare the IPv4 header pointers as const in ipvlan_l3_rcv() and bpf_lwt_input_reroute(). Also, modify the declaration of ip_route_input_noref() in include/net/route.h so that it matches the prototype of its implementation in net/ipv4/route.c. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/a8a747bed452519c4d0cc06af32c7e7795d7b627.1727807926.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 03dd28cf4bc4..5e4374d66927 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,8 +201,8 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, struct in_device *in_dev, u32 *itag); -int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin); +int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev); int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin, const struct sk_buff *hint); @@ -213,8 +213,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, int err; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp), - devin); + err = ip_route_input_noref(skb, dst, src, dscp, devin); if (!err) { skb_dst_force(skb); if (!skb_dst(skb)) -- cgit v1.2.3 From 4f647a780f3606acbd2116248d51eadb4d865615 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:10 -0700 Subject: bpf: __bpf_fastcall for bpf_get_smp_processor_id in uapi Since [1] kernel supports __bpf_fastcall attribute for helper function bpf_get_smp_processor_id(). Update uapi definition for this helper in order to have this attribute in the generated bpf_helper_defs.h [1] commit 91b7fbf3936f ("bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id()") Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..8ab4d8184b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description -- cgit v1.2.3 From da7d71bcb0637b7aa18934628fdb5a55f2db49a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:11 -0700 Subject: bpf: Use KF_FASTCALL to mark kfuncs supporting fastcall contract In order to allow pahole add btf_decl_tag("bpf_fastcall") for kfuncs supporting bpf_fastcall, mark such functions with KF_FASTCALL in id_set8 objects. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index b8a583194c4a..631060e3ad14 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -75,6 +75,7 @@ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ +#define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the -- cgit v1.2.3 From 22fbabe82cea7af4127f089b7f3553cd75571d9a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 12 Sep 2024 15:30:02 -0700 Subject: scsi: ufs: core: Improve the struct ufs_hba documentation Make the role of the structure members related to UIC command processing more clear. Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240912223019.3510966-2-bvanassche@acm.org Reviewed-by: Bao D. Nguyen Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 3f68ae3e4330..a95282b9f743 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -877,9 +877,10 @@ enum ufshcd_mcq_opr { * @tmf_tag_set: TMF tag set. * @tmf_queue: Used to allocate TMF tags. * @tmf_rqs: array with pointers to TMF requests while these are in progress. - * @active_uic_cmd: handle of active UIC command - * @uic_cmd_mutex: mutex for UIC command - * @uic_async_done: completion used during UIC processing + * @active_uic_cmd: pointer to active UIC command. + * @uic_cmd_mutex: mutex used for serializing UIC command processing. + * @uic_async_done: completion used to wait for power mode or hibernation state + * changes. * @ufshcd_state: UFSHCD state * @eh_flags: Error handling flags * @intr_mask: Interrupt Mask Bits -- cgit v1.2.3 From a04d5f82fa3893aa06386db93883b151b93b11ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Oct 2024 05:01:05 +0100 Subject: mm: Remove PageMappedToDisk All callers have now been converted to the folio APIs, so remove the page API for this flag. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20241002040111.1023018-4-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1b3a76710487..35d08c30d4a6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -554,7 +554,7 @@ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) */ TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) -PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) +FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) -- cgit v1.2.3 From fd15ba4cb00a43fb4df42e1f95f94857ad122eea Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Oct 2024 05:01:07 +0100 Subject: ceph: Remove call to PagePrivate2() Use the folio that we already have to call folio_test_private_2() instead. This is the last call to PagePrivate2(), so replace its PAGEFLAG() definition with FOLIO_FLAG(). Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20241002040111.1023018-6-willy@infradead.org Signed-off-by: Christian Brauner --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 35d08c30d4a6..4c2dfe289046 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -543,7 +543,7 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ PAGEFLAG(Private, private, PF_ANY) -PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) +FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE) /* owner_2 can be set on tail pages for anon memory */ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) -- cgit v1.2.3 From 0e45a09a1da0872786885c505467aab8fb29b5b4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 4 Sep 2024 21:17:06 -0700 Subject: Input: serio - define serio_pause_rx guard to pause and resume serio ports serio_pause_rx() and serio_continue_rx() are usually used together to temporarily stop receiving interrupts/data for a given serio port. Define "serio_pause_rx" guard for this so that the port is always resumed once critical section is over. Example: scoped_guard(serio_pause_rx, elo->serio) { elo->expected_packet = toupper(packet[0]); init_completion(&elo->cmd_done); } Link: https://lore.kernel.org/r/20240905041732.2034348-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index bf2191f25350..69a47674af65 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -6,6 +6,7 @@ #define _SERIO_H +#include #include #include #include @@ -161,4 +162,6 @@ static inline void serio_continue_rx(struct serio *serio) spin_unlock_irq(&serio->lock); } +DEFINE_GUARD(serio_pause_rx, struct serio *, serio_pause_rx(_T), serio_continue_rx(_T)) + #endif -- cgit v1.2.3 From c653ffc283404a6c1c0e65143a833180c7ff799b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 3 Oct 2024 07:46:51 -0700 Subject: HID: stop exporting hid_snto32() The only user of hid_snto32() is Logitech HID++ driver, which always calls hid_snto32() with valid size (constant, either 12 or 8) and therefore can simply use sign_extend32(). Make the switch and remove hid_snto32(). Move snto32() and s32ton() to avoid introducing forward declaration. Signed-off-by: Dmitry Torokhov Link: https://patch.msgid.link/20241003144656.3786064-2-dmitry.torokhov@gmail.com [bentiss: fix checkpatch warning] Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 121d5b8bc867..f7c3a66647fd 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -974,7 +974,6 @@ const struct hid_device_id *hid_match_device(struct hid_device *hdev, struct hid_driver *hdrv); bool hid_compare_device_paths(struct hid_device *hdev_a, struct hid_device *hdev_b, char separator); -s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); -- cgit v1.2.3 From 9ab515b18f8463fbb340fece47cd461809e42a9d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:41 +0100 Subject: mm: Define VM_HIGH_ARCH_6 The addition of protection keys means that on arm64 we now use all of the currently defined VM_HIGH_ARCH_x bits. In order to allow us to allocate a new flag for GCS pages define VM_HIGH_ARCH_6. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-2-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecf63d2b0582..182bad0c55df 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -329,12 +329,14 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) +#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS -- cgit v1.2.3 From 91e102e79740ae43ded050ccac71aa3371db4f33 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:43 +0100 Subject: prctl: arch-agnostic prctl for shadow stack Three architectures (x86, aarch64, riscv) have announced support for shadow stacks with fairly similar functionality. While x86 is using arch_prctl() to control the functionality neither arm64 nor riscv uses that interface so this patch adds arch-agnostic prctl() support to get and set status of shadow stacks and lock the current configuation to prevent further changes, with support for turning on and off individual subfeatures so applications can limit their exposure to features that they do not need. The features are: - PR_SHADOW_STACK_ENABLE: Tracking and enforcement of shadow stacks, including allocation of a shadow stack if one is not already allocated. - PR_SHADOW_STACK_WRITE: Writes to specific addresses in the shadow stack. - PR_SHADOW_STACK_PUSH: Push additional values onto the shadow stack. These features are expected to be inherited by new threads and cleared on exec(), unknown features should be rejected for enable but accepted for locking (in order to allow for future proofing). This is based on a patch originally written by Deepak Gupta but modified fairly heavily, support for indirect landing pads is removed, additional modes added and the locking interface reworked. The set status prctl() is also reworked to just set flags, if setting/reading the shadow stack pointer is required this could be a separate prctl. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Acked-by: Yury Khrustalev Signed-off-by: Mark Brown Reviewed-by: Deepak Gupta Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-4-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/linux/mm.h | 4 ++++ include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 182bad0c55df..56654306a832 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4221,4 +4221,8 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) } #endif /* CONFIG_MEM_ALLOC_PROFILING */ +int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status); +int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); +int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + #endif /* _LINUX_MM_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..557a3d2ac1d4 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -328,4 +328,26 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 3630e82ab6bd2642f0fc03b574783ccf2fb0c955 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:44 +0100 Subject: mman: Add map_shadow_stack() flags In preparation for adding arm64 GCS support make the map_shadow_stack() SHADOW_STACK_SET_TOKEN flag generic and add _SET_MARKER. The existing flag indicates that a token usable for stack switch should be added to the top of the newly mapped GCS region while the new flag indicates that a top of stack marker suitable for use by unwinders should be added above that. For arm64 the top of stack marker is all bits 0. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Acked-by: Yury Khrustalev Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-5-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/asm-generic/mman.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 57e8195d0b53..5e3d61ddbd8c 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ -- cgit v1.2.3 From ae80e1629aeaf5be726a9ea94eb7345b1a44b00d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:53 +0100 Subject: mm: Define VM_SHADOW_STACK for arm64 when we support GCS Use VM_HIGH_ARCH_5 for guarded control stack pages. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-14-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/linux/mm.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 56654306a832..8852c39c7695 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -367,7 +367,17 @@ extern unsigned int kobjsize(const void *objp); * for more details on the guard size. */ # define VM_SHADOW_STACK VM_HIGH_ARCH_5 -#else +#endif + +#if defined(CONFIG_ARM64_GCS) +/* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_6 +#endif + +#ifndef VM_SHADOW_STACK # define VM_SHADOW_STACK VM_NONE #endif -- cgit v1.2.3 From 7ec3b57cb29f8371bf12a725b6e8f75831a03f27 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:59:06 +0100 Subject: arm64/ptrace: Expose GCS via ptrace and core files Provide a new register type NT_ARM_GCS reporting the current GCS mode and pointer for EL0. Due to the interactions with allocation and deallocation of Guarded Control Stacks we do not permit any changes to the GCS mode via ptrace, only GCSPR_EL0 may be changed. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-27-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..9adc218fb6df 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -443,6 +443,7 @@ typedef struct elf64_shdr { #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ #define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ #define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From b63c755cb65d43c8aba987c4f6b57c77c6f123f2 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 30 Sep 2024 14:29:53 +0100 Subject: appletalk: Remove deadcode alloc_ltalkdev in net/appletalk/dev.c is dead since commit 00f3696f7555 ("net: appletalk: remove cops support") Removing it (and it's helper) leaves dev.c and if_ltalk.h empty; remove them and the Makefile entry. tun.c was including that if_ltalk.h but actually wanted the uapi version for LTALK_ALEN, fix up the path. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/if_ltalk.h | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 include/linux/if_ltalk.h (limited to 'include') diff --git a/include/linux/if_ltalk.h b/include/linux/if_ltalk.h deleted file mode 100644 index 4cc1c0b77870..000000000000 --- a/include/linux/if_ltalk.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_LTALK_H -#define __LINUX_LTALK_H - -#include - -extern struct net_device *alloc_ltalkdev(int sizeof_priv); -#endif -- cgit v1.2.3 From ec841b8d73cff37f8960e209017efe1eb2fb21f2 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 23 Sep 2024 16:12:01 +0800 Subject: usb: chipidea: add CI_HDRC_HAS_SHORT_PKT_LIMIT flag Currently, the imx deivice controller has below limitations: 1. can't generate short packet interrupt if IOC not set in dTD. So if one request span more than one dTDs and only the last dTD set IOC, the usb request will pending there if no more data comes. 2. the controller can't accurately deliver data to differtent usb requests in some cases due to short packet. For example: one usb request span 3 dTDs, then if the controller received a short packet the next packet will go to 2nd dTD of current request rather than the first dTD of next request. 3. can't build a bus packet use multiple dTDs. For example: controller needs to send one packet of 512 bytes use dTD1 (200 bytes) + dTD2 (312 bytes), actually the host side will see 200 bytes short packet. Based on these limits, add CI_HDRC_HAS_SHORT_PKT_LIMIT flag and use it on imx platforms. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240923081203.2851768-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 5a7f96684ea2..ebdfef124b2b 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -65,6 +65,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) #define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) #define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) +#define CI_HDRC_HAS_SHORT_PKT_LIMIT BIT(19) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 -- cgit v1.2.3 From 8b7fd6a15f8c32760c2026a62dcf55219b4da15b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 1 Oct 2024 16:30:05 +0200 Subject: HID: bpf: move HID-BPF report descriptor fixup earlier Currently, hid_bpf_rdesc_fixup() is called once the match between the HID device and the driver is done. This can be problematic in case the driver selected by the kernel would change the report descriptor after the fact. To give a chance for hid_bpf_rdesc_fixup() to provide hints on to how to select a dedicated driver or not, move the call to that BPF hook earlier in the .probe() process, when we get the first match. However, this means that we might get called more than once (typically once for hid-generic, and once for hid-vendor-specific). So we store the result of HID-BPF fixup in struct hid_device. Basically, this means that ->bpf_rdesc can replace ->dev_rdesc when it was used in the code. In order to not grow struct hid_device, some fields are re-ordered. This was the output of pahole for the first 128 bytes: struct hid_device { __u8 * dev_rdesc; /* 0 8 */ unsigned int dev_rsize; /* 8 4 */ /* XXX 4 bytes hole, try to pack */ __u8 * rdesc; /* 16 8 */ unsigned int rsize; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct hid_collection * collection; /* 32 8 */ unsigned int collection_size; /* 40 4 */ unsigned int maxcollection; /* 44 4 */ unsigned int maxapplication; /* 48 4 */ __u16 bus; /* 52 2 */ __u16 group; /* 54 2 */ __u32 vendor; /* 56 4 */ __u32 product; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u32 version; /* 64 4 */ enum hid_type type; /* 68 4 */ unsigned int country; /* 72 4 */ /* XXX 4 bytes hole, try to pack */ struct hid_report_enum report_enum[3]; /* 80 6216 */ Basically, we got three holes of 4 bytes. We can reorder things a little and makes those 3 holes a continuous 12 bytes hole, which can be replaced by the new pointer and the new unsigned int we need. In terms of code allocation, when not using HID-BPF, we are back to kernel v6.2 in hid_open_report(). These multiple kmemdup() calls will be fixed in a later commit. Link: https://patch.msgid.link/20241001-hid-bpf-hid-generic-v3-1-2ef1019468df@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 18 ++++++++++-------- include/linux/hid_bpf.h | 11 +++-------- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 121d5b8bc867..ff58b5ceb62e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -599,15 +599,17 @@ enum hid_battery_status { struct hid_driver; struct hid_ll_driver; -struct hid_device { /* device report descriptor */ - const __u8 *dev_rdesc; - unsigned dev_rsize; - const __u8 *rdesc; - unsigned rsize; +struct hid_device { + const __u8 *dev_rdesc; /* device report descriptor */ + const __u8 *bpf_rdesc; /* bpf modified report descriptor, if any */ + const __u8 *rdesc; /* currently used report descriptor */ + unsigned int dev_rsize; + unsigned int bpf_rsize; + unsigned int rsize; + unsigned int collection_size; /* Number of allocated hid_collections */ struct hid_collection *collection; /* List of HID collections */ - unsigned collection_size; /* Number of allocated hid_collections */ - unsigned maxcollection; /* Number of parsed collections */ - unsigned maxapplication; /* Number of applications */ + unsigned int maxcollection; /* Number of parsed collections */ + unsigned int maxapplication; /* Number of applications */ __u16 bus; /* BUS ID */ __u16 group; /* Report group */ __u32 vendor; /* Vendor ID */ diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 6a47223e6460..a6876ab29004 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -212,7 +212,7 @@ int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); int hid_bpf_device_init(struct hid_device *hid); -u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); +const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, @@ -228,13 +228,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; } -/* - * This specialized allocator has to be a macro for its allocations to be - * accounted separately (to have a separate alloc_tag). The typecast is - * intentional to enforce typesafety. - */ -#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ - ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) +static inline const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, + unsigned int *size) { return rdesc; } #endif /* CONFIG_HID_BPF */ -- cgit v1.2.3 From 645c224ac5f6e0013931c342ea707b398d24d410 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 1 Oct 2024 16:30:12 +0200 Subject: HID: add per device quirk to force bind to hid-generic We already have the possibility to force not binding to hid-generic and rely on a dedicated driver, but we couldn't do the other way around. This is useful for BPF programs where we are fixing the report descriptor and the events, but want to avoid a specialized driver to come after BPF which would unwind everything that is done there. Reviewed-by: Peter Hutterer Link: https://patch.msgid.link/20241001-hid-bpf-hid-generic-v3-8-2ef1019468df@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index ff58b5ceb62e..63330d623335 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -359,6 +359,7 @@ struct hid_item { * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: + * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER * | @HID_QUIRK_FULLSPEED_INTERVAL: * | @HID_QUIRK_NO_INIT_REPORTS: * | @HID_QUIRK_NO_IGNORE: @@ -384,6 +385,7 @@ struct hid_item { #define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19) #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) #define HID_QUIRK_NOINVERT BIT(21) +#define HID_QUIRK_IGNORE_SPECIAL_DRIVER BIT(22) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) -- cgit v1.2.3 From 5acd957a986c167d357e617a887630203be29ea4 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 1 Oct 2024 13:37:04 +0300 Subject: net/mlx5: hw counters: Make fc_stats & fc_pool private The mlx5_fc_stats and mlx5_fc_pool structs are only used from fs_counters.c. As such, make them private there. mlx5_fc_pool is not used or referenced at all outside fs_counters. mlx5_fc_stats is referenced from mlx5_core_dev, so instead of having it as a direct member (which requires exporting it from fs_counters), store a pointer to it, allocate it on init and clear it on destroy. One caveat is that a simple container_of to get from a 'work' struct to the outermost mlx5_core_dev struct directly no longer works, so an extra pointer had to be added to mlx5_fc_stats back to the parent dev. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241001103709.58127-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e23c692a34c7..fc7e6153b73d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -474,36 +473,6 @@ struct mlx5_core_sriov { u16 max_ec_vfs; }; -struct mlx5_fc_pool { - struct mlx5_core_dev *dev; - struct mutex pool_lock; /* protects pool lists */ - struct list_head fully_used; - struct list_head partially_used; - struct list_head unused; - int available_fcs; - int used_fcs; - int threshold; -}; - -struct mlx5_fc_stats { - spinlock_t counters_idr_lock; /* protects counters_idr */ - struct idr counters_idr; - struct list_head counters; - struct llist_head addlist; - struct llist_head dellist; - - struct workqueue_struct *wq; - struct delayed_work work; - unsigned long next_query; - unsigned long sampling_interval; /* jiffies */ - u32 *bulk_query_out; - int bulk_query_len; - size_t num_counters; - bool bulk_query_alloc_failed; - unsigned long next_bulk_query_alloc; - struct mlx5_fc_pool fc_pool; -}; - struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; @@ -630,7 +599,7 @@ struct mlx5_priv { struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; - struct mlx5_fc_stats fc_stats; + struct mlx5_fc_stats *fc_stats; struct mlx5_rl_table rl_table; struct mlx5_ft_pool *ft_pool; -- cgit v1.2.3 From d1c9cffe4b01f4d8bc52169139a5fedd48908abc Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 1 Oct 2024 13:37:09 +0300 Subject: net/mlx5: hw counters: Remove mlx5_fc_create_ex It no longer serves any purpose and is identical to mlx5_fc_create upon which it was originally based of. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241001103709.58127-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b744e554f014..438db888bde0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -298,9 +298,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); -/* As mlx5_fc_create() but doesn't queue stats refresh thread. */ -struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging); - void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, -- cgit v1.2.3 From 4aecca4c76808f3736056d18ff510df80424bc9f Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 1 Oct 2024 05:57:14 -0700 Subject: net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX timestamps and packets sent via socket. Unfortunately, there is no way to reliably predict socket timestamp ID value in case of error returned by sendmsg. For UDP sockets it's impossible because of lockless nature of UDP transmit, several threads may send packets in parallel. In case of RAW sockets MSG_MORE option makes things complicated. More details are in the conversation [1]. This patch adds new control message type to give user-space software an opportunity to control the mapping between packets and values by providing ID with each sendmsg for UDP sockets. The documentation is also added in this patch. [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20241001125716.2832769-2-vadfed@meta.com Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 4 +++- include/net/sock.h | 7 +++++++ include/uapi/asm-generic/socket.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 394c3b66065e..f01dd273bea6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -174,6 +174,7 @@ struct inet_cork { __s16 tos; char priority; __u16 gso_size; + u32 ts_opt_id; u64 transmit_time; u32 mark; }; @@ -241,7 +242,8 @@ struct inet_sock { struct inet_cork_full cork; }; -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */ enum { INET_FLAGS_PKTINFO = 0, diff --git a/include/net/sock.h b/include/net/sock.h index c58ca8dd561b..ccf28c2b70b1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -954,6 +954,12 @@ enum sock_flags { }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) +/* + * The highest bit of sk_tsflags is reserved for kernel-internal + * SOCKCM_FLAG_TS_OPT_ID. There is a check in core/sock.c to control that + * SOF_TIMESTAMPING* values do not reach this reserved area + */ +#define SOCKCM_FLAG_TS_OPT_ID BIT(31) static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { @@ -1796,6 +1802,7 @@ struct sockcm_cookie { u64 transmit_time; u32 mark; u32 tsflags; + u32 ts_opt_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 3b4e3e815602..deacfd6dd197 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -141,6 +141,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 80 +#define SCM_TS_OPT_ID 81 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 822b5bc6db55f1c3ea51659c423784ac6919ddd4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 1 Oct 2024 05:57:15 -0700 Subject: net_tstamp: add SCM_TS_OPT_ID for RAW sockets The last type of sockets which supports SOF_TIMESTAMPING_OPT_ID is RAW sockets. To add new option this patch converts all callers (direct and indirect) of _sock_tx_timestamp to provide sockcm_cookie instead of tsflags. And while here fix __sock_tx_timestamp to receive tsflags as __u32 instead of __u16. Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20241001125716.2832769-3-vadfed@meta.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ccf28c2b70b1..e282127092ab 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2660,39 +2660,48 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, sock_write_timestamp(sk, 0); } -void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); +void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags); /** * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet - * @tsflags: timestamping flags to use + * @sockc: pointer to socket cmsg cookie to get timestamping info * @tx_flags: completed with instructions for time stamping * @tskey: filled in with next sk_tskey (not for TCP, which uses seqno) * * Note: callers should take care of initial ``*tx_flags`` value (usually 0) */ -static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, +static inline void _sock_tx_timestamp(struct sock *sk, + const struct sockcm_cookie *sockc, __u8 *tx_flags, __u32 *tskey) { + __u32 tsflags = sockc->tsflags; + if (unlikely(tsflags)) { __sock_tx_timestamp(tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && - tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { + if (tsflags & SOCKCM_FLAG_TS_OPT_ID) + *tskey = sockc->ts_opt_id; + else + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + } } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } -static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags, +static inline void sock_tx_timestamp(struct sock *sk, + const struct sockcm_cookie *sockc, __u8 *tx_flags) { - _sock_tx_timestamp(sk, tsflags, tx_flags, NULL); + _sock_tx_timestamp(sk, sockc, tx_flags, NULL); } -static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) +static inline void skb_setup_tx_timestamp(struct sk_buff *skb, + const struct sockcm_cookie *sockc) { - _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags, + _sock_tx_timestamp(skb->sk, sockc, &skb_shinfo(skb)->tx_flags, &skb_shinfo(skb)->tskey); } -- cgit v1.2.3 From 5a9071a760a61b00260334ad576fe60debafaafc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Oct 2024 17:30:40 +0000 Subject: tcp: annotate data-races around icsk->icsk_pending icsk->icsk_pending can be read locklessly already. Following patch in the series will add another lockless read. Add smp_load_acquire() and smp_store_release() annotations because following patch will add a test in tcp_write_timer(), and READ_ONCE()/WRITE_ONCE() alone would possibly lead to races. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241002173042.917928-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c0deaafebfdc..914d19772704 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -197,7 +197,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) struct inet_connection_sock *icsk = inet_csk(sk); if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = 0; + smp_store_release(&icsk->icsk_pending, 0); #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif @@ -229,7 +229,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { - icsk->icsk_pending = what; + smp_store_release(&icsk->icsk_pending, what); icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); } else if (what == ICSK_TIME_DACK) { -- cgit v1.2.3 From 81df4fa94ee8c0800ed42c47357435602ed105ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Oct 2024 17:30:42 +0000 Subject: tcp: add a fast path in tcp_delack_timer() delack timer is not stopped from inet_csk_clear_xmit_timer() because we do not define INET_CSK_CLEAR_TIMERS. This is a conscious choice : inet_csk_clear_xmit_timer() is often called from another cpu. Calling del_timer() would cause false sharing and lock contention. This means that very often, tcp_delack_timer() is called at the timer expiration, while there is no ACK to transmit. This can be detected very early, avoiding the socket spinlock. Notes: - test about tp->compressed_ack is racy, but in the unlikely case there is a race, the dedicated compressed_ack_timer hrtimer would close it. - Even if the fast path is not taken, reading icsk->icsk_ack.pending and tp->compressed_ack before acquiring the socket spinlock reduces acquisition time and chances of contention. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241002173042.917928-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 914d19772704..3c82fad904d4 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -202,7 +202,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_ack.pending, 0); icsk->icsk_ack.retry = 0; #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_delack_timer); @@ -233,7 +233,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); } else { -- cgit v1.2.3 From f858cc9eed5b05cbe38d7ffd2787c21e3718eb7d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2024 12:12:18 +0000 Subject: net: add IFLA_MAX_PACING_OFFLOAD_HORIZON device attribute Some network devices have the ability to offload EDT (Earliest Departure Time) which is the model used for TCP pacing and FQ packet scheduler. Some of them implement the timing wheel mechanism described in https://saeed.github.io/files/carousel-sigcomm17.pdf with an associated 'timing wheel horizon'. This patch adds dev->max_pacing_offload_horizon expressing this timing wheel horizon in nsec units. This is a read-only attribute. Unless a driver sets it, dev->max_pacing_offload_horizon is zero. v2: addressed Jakub feedback ( https://lore.kernel.org/netdev/20240930152304.472767-2-edumazet@google.com/T/#mf6294d714c41cc459962154cc2580ce3c9693663 ) v3: added yaml doc (also per Jakub feedback) Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241003121219.2396589-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d20c776a4ff..49a7e7db0883 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2009,6 +2009,8 @@ enum netdev_reg_state { * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. * + * @max_pacing_offload_horizon: max EDT offload horizon in nsec. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2399,6 +2401,8 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; + u64 max_pacing_offload_horizon; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6dc258993b17..506ba9c80e83 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -377,6 +377,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, IFLA_DPLL_PIN, + IFLA_MAX_PACING_OFFLOAD_HORIZON, __IFLA_MAX }; -- cgit v1.2.3 From f26080d47007df2ee90e65b7d390207ff3a588af Mon Sep 17 00:00:00 2001 From: Jeffrey Ji Date: Thu, 3 Oct 2024 12:12:19 +0000 Subject: net_sched: sch_fq: add the ability to offload pacing Some network devices have the ability to offload EDT (Earliest Departure Time) which is the model used for TCP pacing and FQ packet scheduler. Some of them implement the timing wheel mechanism described in https://saeed.github.io/files/carousel-sigcomm17.pdf with an associated 'timing wheel horizon'. This patchs adds to FQ packet scheduler TCA_FQ_OFFLOAD_HORIZON attribute. Its value is capped by the device max_pacing_offload_horizon, added in the prior patch. It allows FQ to let packets within pacing offload horizon to be delivered to the device, which will handle the needed delay without host involvement. Signed-off-by: Jeffrey Ji Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241003121219.2396589-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a3cd0c2dc995..25a9a47001cd 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -836,6 +836,8 @@ enum { TCA_FQ_WEIGHTS, /* Weights for each band */ + TCA_FQ_OFFLOAD_HORIZON, /* dequeue paced packets within this horizon immediately (us units) */ + __TCA_FQ_MAX }; -- cgit v1.2.3 From 65c4c93caaf1a9fca2855942e338530967162d25 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:12 +0200 Subject: crypto: sig - Introduce sig_alg backend Commit 6cb8815f41a9 ("crypto: sig - Add interface for sign/verify") began a transition of asymmetric sign/verify operations from crypto_akcipher to a new crypto_sig frontend. Internally, the crypto_sig frontend still uses akcipher_alg as backend, however: "The link between sig and akcipher is meant to be temporary. The plan is to create a new low-level API for sig and then migrate the signature code over to that from akcipher." https://lore.kernel.org/r/ZrG6w9wsb-iiLZIF@gondor.apana.org.au/ "having a separate alg for sig is definitely where we want to be since there is very little that the two types actually share." https://lore.kernel.org/r/ZrHlpz4qnre0zWJO@gondor.apana.org.au/ Take the next step of that migration and augment the crypto_sig frontend with a sig_alg backend to which all algorithms can be moved. During the migration, there will briefly be signature algorithms that are still based on crypto_akcipher, whilst others are already based on crypto_sig. Allow for that by building a fork into crypto_sig_*() API calls (i.e. crypto_sig_maxsize() and friends) such that one of the two backends is selected based on the transform's cra_type. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/crypto/internal/sig.h | 80 +++++++++++++++++++++++++++++++++++++++++ include/crypto/sig.h | 61 +++++++++++++++++++++++++++++++ include/uapi/linux/cryptouser.h | 5 +++ 3 files changed, 146 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/sig.h b/include/crypto/internal/sig.h index 97cb26ef8115..b16648c1a986 100644 --- a/include/crypto/internal/sig.h +++ b/include/crypto/internal/sig.h @@ -10,8 +10,88 @@ #include #include +struct sig_instance { + void (*free)(struct sig_instance *inst); + union { + struct { + char head[offsetof(struct sig_alg, base)]; + struct crypto_instance base; + }; + struct sig_alg alg; + }; +}; + +struct crypto_sig_spawn { + struct crypto_spawn base; +}; + static inline void *crypto_sig_ctx(struct crypto_sig *tfm) { return crypto_tfm_ctx(&tfm->base); } + +/** + * crypto_register_sig() -- Register public key signature algorithm + * + * Function registers an implementation of a public key signature algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_sig(struct sig_alg *alg); + +/** + * crypto_unregister_sig() -- Unregister public key signature algorithm + * + * Function unregisters an implementation of a public key signature algorithm + * + * @alg: algorithm definition + */ +void crypto_unregister_sig(struct sig_alg *alg); + +int sig_register_instance(struct crypto_template *tmpl, + struct sig_instance *inst); + +static inline struct sig_instance *sig_instance(struct crypto_instance *inst) +{ + return container_of(&inst->alg, struct sig_instance, alg.base); +} + +static inline struct sig_instance *sig_alg_instance(struct crypto_sig *tfm) +{ + return sig_instance(crypto_tfm_alg_instance(&tfm->base)); +} + +static inline struct crypto_instance *sig_crypto_instance(struct sig_instance + *inst) +{ + return container_of(&inst->alg.base, struct crypto_instance, alg); +} + +static inline void *sig_instance_ctx(struct sig_instance *inst) +{ + return crypto_instance_ctx(sig_crypto_instance(inst)); +} + +int crypto_grab_sig(struct crypto_sig_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask); + +static inline struct crypto_sig *crypto_spawn_sig(struct crypto_sig_spawn + *spawn) +{ + return crypto_spawn_tfm2(&spawn->base); +} + +static inline void crypto_drop_sig(struct crypto_sig_spawn *spawn) +{ + crypto_drop_spawn(&spawn->base); +} + +static inline struct sig_alg *crypto_spawn_sig_alg(struct crypto_sig_spawn + *spawn) +{ + return container_of(spawn->base.alg, struct sig_alg, base); +} #endif diff --git a/include/crypto/sig.h b/include/crypto/sig.h index d25186bb2be3..f0f52a7c5ae7 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -19,6 +19,52 @@ struct crypto_sig { struct crypto_tfm base; }; +/** + * struct sig_alg - generic public key signature algorithm + * + * @sign: Function performs a sign operation as defined by public key + * algorithm. Optional. + * @verify: Function performs a complete verify operation as defined by + * public key algorithm, returning verification status. Optional. + * @set_pub_key: Function invokes the algorithm specific set public key + * function, which knows how to decode and interpret + * the BER encoded public key and parameters. Mandatory. + * @set_priv_key: Function invokes the algorithm specific set private key + * function, which knows how to decode and interpret + * the BER encoded private key and parameters. Optional. + * @max_size: Function returns key size. Mandatory. + * @init: Initialize the cryptographic transformation object. + * This function is used to initialize the cryptographic + * transformation object. This function is called only once at + * the instantiation time, right after the transformation context + * was allocated. In case the cryptographic hardware has some + * special requirements which need to be handled by software, this + * function shall check for the precise requirement of the + * transformation and put any software fallbacks in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * + * @base: Common crypto API algorithm data structure + */ +struct sig_alg { + int (*sign)(struct crypto_sig *tfm, + const void *src, unsigned int slen, + void *dst, unsigned int dlen); + int (*verify)(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen); + int (*set_pub_key)(struct crypto_sig *tfm, + const void *key, unsigned int keylen); + int (*set_priv_key)(struct crypto_sig *tfm, + const void *key, unsigned int keylen); + unsigned int (*max_size)(struct crypto_sig *tfm); + int (*init)(struct crypto_sig *tfm); + void (*exit)(struct crypto_sig *tfm); + + struct crypto_alg base; +}; + /** * DOC: Generic Public Key Signature API * @@ -47,6 +93,21 @@ static inline struct crypto_tfm *crypto_sig_tfm(struct crypto_sig *tfm) return &tfm->base; } +static inline struct crypto_sig *__crypto_sig_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_sig, base); +} + +static inline struct sig_alg *__crypto_sig_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct sig_alg, base); +} + +static inline struct sig_alg *crypto_sig_alg(struct crypto_sig *tfm) +{ + return __crypto_sig_alg(crypto_sig_tfm(tfm)->__crt_alg); +} + /** * crypto_free_sig() - free signature tfm handle * diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 20a6c0fc149e..db05e0419972 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -64,6 +64,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_STAT_AKCIPHER, /* No longer supported, do not use. */ CRYPTOCFGA_STAT_KPP, /* No longer supported, do not use. */ CRYPTOCFGA_STAT_ACOMP, /* No longer supported, do not use. */ + CRYPTOCFGA_REPORT_SIG, /* struct crypto_report_sig */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -207,6 +208,10 @@ struct crypto_report_acomp { char type[CRYPTO_MAX_NAME]; }; +struct crypto_report_sig { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) -- cgit v1.2.3 From 7964b0d4bd1271f82d6b455366a200d320f7dbf8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:15 +0200 Subject: crypto: rsa-pkcs1pad - Deduplicate set_{pub,priv}_key callbacks pkcs1pad_set_pub_key() and pkcs1pad_set_priv_key() are almost identical. The upcoming migration of sign/verify operations from rsa-pkcs1pad.c into a separate crypto_template will require another copy of the exact same functions. When RSASSA-PSS and RSAES-OAEP are introduced, each will need yet another copy. Deduplicate the functions into a single one which lives in a common header file for reuse by RSASSA-PKCS1-v1_5, RSASSA-PSS and RSAES-OAEP. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Signed-off-by: Herbert Xu --- include/crypto/internal/rsa.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h index e870133f4b77..754f687134df 100644 --- a/include/crypto/internal/rsa.h +++ b/include/crypto/internal/rsa.h @@ -8,6 +8,7 @@ #ifndef _RSA_HELPER_ #define _RSA_HELPER_ #include +#include /** * rsa_key - RSA key structure @@ -53,5 +54,32 @@ int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key, int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key, unsigned int key_len); +#define RSA_PUB (true) +#define RSA_PRIV (false) + +static inline int rsa_set_key(struct crypto_akcipher *child, + unsigned int *key_size, bool is_pubkey, + const void *key, unsigned int keylen) +{ + int err; + + *key_size = 0; + + if (is_pubkey) + err = crypto_akcipher_set_pub_key(child, key, keylen); + else + err = crypto_akcipher_set_priv_key(child, key, keylen); + if (err) + return err; + + /* Find out new modulus size from rsa implementation */ + err = crypto_akcipher_maxsize(child); + if (err > PAGE_SIZE) + return -ENOTSUPP; + + *key_size = err; + return 0; +} + extern struct crypto_template rsa_pkcs1pad_tmpl; #endif -- cgit v1.2.3 From 1e562deacecca1f1bec7d23da526904a1e87525e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:16 +0200 Subject: crypto: rsassa-pkcs1 - Migrate to sig_alg backend A sig_alg backend has just been introduced with the intent of moving all asymmetric sign/verify algorithms to it one by one. Migrate the sign/verify operations from rsa-pkcs1pad.c to a separate rsassa-pkcs1.c which uses the new backend. Consequently there are now two templates which build on the "rsa" akcipher_alg: * The existing "pkcs1pad" template, which is instantiated as an akcipher_instance and retains the encrypt/decrypt operations of RSAES-PKCS1-v1_5 (RFC 8017 sec 7.2). * The new "pkcs1" template, which is instantiated as a sig_instance and contains the sign/verify operations of RSASSA-PKCS1-v1_5 (RFC 8017 sec 8.2). In a separate step, rsa-pkcs1pad.c could optionally be renamed to rsaes-pkcs1.c for clarity. Additional "oaep" and "pss" templates could be added for RSAES-OAEP and RSASSA-PSS. Note that it's currently allowed to allocate a "pkcs1pad(rsa)" transform without specifying a hash algorithm. That makes sense if the transform is only used for encrypt/decrypt and continues to be supported. But for sign/verify, such transforms previously did not insert the Full Hash Prefix into the padding. The resulting message encoding was incompliant with EMSA-PKCS1-v1_5 (RFC 8017 sec 9.2) and therefore nonsensical. From here on in, it is no longer allowed to allocate a transform without specifying a hash algorithm if the transform is used for sign/verify operations. This simplifies the code because the insertion of the Full Hash Prefix is no longer optional, so various "if (digest_info)" clauses can be removed. There has been a previous attempt to forbid transform allocation without specifying a hash algorithm, namely by commit c0d20d22e0ad ("crypto: rsa-pkcs1pad - Require hash to be present"). It had to be rolled back with commit b3a8c8a5ebb5 ("crypto: rsa-pkcs1pad: Allow hash to be optional [ver #2]"), presumably because it broke allocation of a transform which was solely used for encrypt/decrypt, not sign/verify. Avoid such breakage by allowing transform allocation for encrypt/decrypt with and without specifying a hash algorithm (and simply ignoring the hash algorithm in the former case). So again, specifying a hash algorithm is now mandatory for sign/verify, but optional and ignored for encrypt/decrypt. The new sig_alg API uses kernel buffers instead of sglists, which avoids the overhead of copying signature and digest from sglists back into kernel buffers. rsassa-pkcs1.c is thus simplified quite a bit. sig_alg is always synchronous, whereas the underlying "rsa" akcipher_alg may be asynchronous. So await the result of the akcipher_alg, similar to crypto_akcipher_sync_{en,de}crypt(). As part of the migration, rename "rsa_digest_info" to "hash_prefix" to adhere to the spec language in RFC 9580. Otherwise keep the code unmodified wherever possible to ease reviewing and bisecting. Leave several simplification and hardening opportunities to separate commits. rsassa-pkcs1.c uses modern __free() syntax for allocation of buffers which need to be freed by kfree_sensitive(), hence a DEFINE_FREE() clause for kfree_sensitive() is introduced herein as a byproduct. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/crypto/internal/rsa.h | 1 + include/linux/slab.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h index 754f687134df..071a1951b992 100644 --- a/include/crypto/internal/rsa.h +++ b/include/crypto/internal/rsa.h @@ -82,4 +82,5 @@ static inline int rsa_set_key(struct crypto_akcipher *child, } extern struct crypto_template rsa_pkcs1pad_tmpl; +extern struct crypto_template rsassa_pkcs1_tmpl; #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index b35e2db7eb0e..0268ea7abf8b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -448,6 +448,7 @@ void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) +DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) /** * ksize - Report actual allocation size of associated object -- cgit v1.2.3 From 5b553e06b3215fa97d222ebddc2bc964f1824c5b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:19 +0200 Subject: crypto: virtio - Drop sign/verify operations The virtio crypto driver exposes akcipher sign/verify operations in a user space ABI. This blocks removal of sign/verify from akcipher_alg. Herbert opines: "I would say that this is something that we can break. Breaking it is no different to running virtio on a host that does not support these algorithms. After all, a software implementation must always be present. I deliberately left akcipher out of crypto_user because the API is still in flux. We should not let virtio constrain ourselves." https://lore.kernel.org/all/ZtqoNAgcnXnrYhZZ@gondor.apana.org.au/ "I would remove virtio akcipher support in its entirety. This API was never meant to be exposed outside of the kernel." https://lore.kernel.org/all/Ztqql_gqgZiMW8zz@gondor.apana.org.au/ Drop sign/verify support from virtio crypto. There's no strong reason to also remove encrypt/decrypt support, so keep it. A key selling point of virtio crypto is to allow guest access to crypto accelerators on the host. So far the only akcipher algorithm supported by virtio crypto is RSA. Dropping sign/verify merely means that the PKCS#1 padding is now always generated or verified inside the guest, but the actual signature generation/verification (which is an RSA decrypt/encrypt operation) may still use an accelerator on the host. Generating or verifying the PKCS#1 padding is cheap, so a hardware accelerator won't be of much help there. Which begs the question whether virtio crypto support for sign/verify makes sense at all. It would make sense for the sign operation if the host has a security chip to store asymmetric private keys. But the kernel doesn't even have an asymmetric_key_subtype yet for hardware-based private keys. There's at least one rudimentary driver for such chips (atmel-ecc.c for ATECC508A), but it doesn't implement the sign operation. The kernel would first have to grow support for a hardware asymmetric_key_subtype and at least one driver implementing the sign operation before exposure to guests via virtio makes sense. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/uapi/linux/virtio_crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 71a54a6849ca..2fccb64c9d6b 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -329,6 +329,7 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) #define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) + /* akcipher sign/verify opcodes are deprecated */ #define VIRTIO_CRYPTO_AKCIPHER_SIGN \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) #define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ -- cgit v1.2.3 From 6b34562f0cfe81f1f207fc7c146c4ff4b31eb625 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:21 +0200 Subject: crypto: akcipher - Drop sign/verify operations A sig_alg backend has just been introduced and all asymmetric sign/verify algorithms have been migrated to it. The sign/verify operations can thus be dropped from akcipher_alg. It is now purely for asymmetric encrypt/decrypt. Move struct crypto_akcipher_sync_data from internal.h to akcipher.c and unexport crypto_akcipher_sync_{prep,post}(): They're no longer used by sig.c but only locally in akcipher.c. In crypto_akcipher_sync_{prep,post}(), drop various NULL pointer checks for data->dst as they were only necessary for the verify operation. In the crypto_sig_*() API calls, remove the forks that were necessary while algorithms were converted from crypto_akcipher to crypto_sig one by one. In struct akcipher_testvec, remove the "params", "param_len" and "algo" elements as they were only needed for the ecrdsa verify operation. Remove corresponding dead code from test_akcipher_one() as well. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/crypto/akcipher.h | 69 +++++--------------------------------- include/crypto/internal/akcipher.h | 4 +-- 2 files changed, 10 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index 18a10cad07aa..cdf7da74bf2f 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -12,24 +12,19 @@ #include /** - * struct akcipher_request - public key request + * struct akcipher_request - public key cipher request * * @base: Common attributes for async crypto requests * @src: Source data - * For verify op this is signature + digest, in that case - * total size of @src is @src_len + @dst_len. - * @dst: Destination data (Should be NULL for verify op) + * @dst: Destination data * @src_len: Size of the input buffer - * For verify op it's size of signature part of @src, this part - * is supposed to be operated by cipher. - * @dst_len: Size of @dst buffer (for all ops except verify). + * @dst_len: Size of @dst buffer * It needs to be at least as big as the expected result * depending on the operation. * After operation it will be updated with the actual size of the * result. * In case of error where the dst sgl size was insufficient, * it will be updated to the size required for the operation. - * For verify op this is size of digest part in @src. * @__ctx: Start of private context data */ struct akcipher_request { @@ -55,15 +50,8 @@ struct crypto_akcipher { }; /** - * struct akcipher_alg - generic public key algorithm + * struct akcipher_alg - generic public key cipher algorithm * - * @sign: Function performs a sign operation as defined by public key - * algorithm. In case of error, where the dst_len was insufficient, - * the req->dst_len will be updated to the size required for the - * operation - * @verify: Function performs a complete verify operation as defined by - * public key algorithm, returning verification status. Requires - * digest value as input parameter. * @encrypt: Function performs an encrypt operation as defined by public key * algorithm. In case of error, where the dst_len was insufficient, * the req->dst_len will be updated to the size required for the @@ -94,8 +82,6 @@ struct crypto_akcipher { * @base: Common crypto API algorithm data structure */ struct akcipher_alg { - int (*sign)(struct akcipher_request *req); - int (*verify)(struct akcipher_request *req); int (*encrypt)(struct akcipher_request *req); int (*decrypt)(struct akcipher_request *req); int (*set_pub_key)(struct crypto_akcipher *tfm, const void *key, @@ -110,9 +96,9 @@ struct akcipher_alg { }; /** - * DOC: Generic Public Key API + * DOC: Generic Public Key Cipher API * - * The Public Key API is used with the algorithms of type + * The Public Key Cipher API is used with the algorithms of type * CRYPTO_ALG_TYPE_AKCIPHER (listed as type "akcipher" in /proc/crypto) */ @@ -243,10 +229,9 @@ static inline void akcipher_request_set_callback(struct akcipher_request *req, * * @req: public key request * @src: ptr to input scatter list - * @dst: ptr to output scatter list or NULL for verify op + * @dst: ptr to output scatter list * @src_len: size of the src input scatter list to be processed - * @dst_len: size of the dst output scatter list or size of signature - * portion in @src for verify op + * @dst_len: size of the dst output scatter list */ static inline void akcipher_request_set_crypt(struct akcipher_request *req, struct scatterlist *src, @@ -347,44 +332,6 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen); -/** - * crypto_akcipher_sign() - Invoke public key sign operation - * - * Function invokes the specific public key sign operation for a given - * public key algorithm - * - * @req: asymmetric key request - * - * Return: zero on success; error code in case of error - */ -static inline int crypto_akcipher_sign(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - return crypto_akcipher_alg(tfm)->sign(req); -} - -/** - * crypto_akcipher_verify() - Invoke public key signature verification - * - * Function invokes the specific public key signature verification operation - * for a given public key algorithm. - * - * @req: asymmetric key request - * - * Note: req->dst should be NULL, req->src should point to SG of size - * (req->src_size + req->dst_size), containing signature (of req->src_size - * length) with appended digest (of req->dst_size length). - * - * Return: zero on verification success; error code in case of error. - */ -static inline int crypto_akcipher_verify(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - return crypto_akcipher_alg(tfm)->verify(req); -} - /** * crypto_akcipher_set_pub_key() - Invoke set public key operation * diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index a0fba4b2eccf..14ee62bc52b6 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -124,7 +124,7 @@ static inline struct akcipher_alg *crypto_spawn_akcipher_alg( /** * crypto_register_akcipher() -- Register public key algorithm * - * Function registers an implementation of a public key verify algorithm + * Function registers an implementation of a public key cipher algorithm * * @alg: algorithm definition * @@ -135,7 +135,7 @@ int crypto_register_akcipher(struct akcipher_alg *alg); /** * crypto_unregister_akcipher() -- Unregister public key algorithm * - * Function unregisters an implementation of a public key verify algorithm + * Function unregisters an implementation of a public key cipher algorithm * * @alg: algorithm definition */ -- cgit v1.2.3 From 5ba296674e468ddd40b6ef5aa845c2d2ee794b84 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:22 +0200 Subject: crypto: sig - Move crypto_sig_*() API calls to include file The crypto_sig_*() API calls lived in sig.c so far because they needed access to struct crypto_sig_type: This was necessary to differentiate between signature algorithms that had already been migrated from crypto_akcipher to crypto_sig and those that hadn't yet. Now that all algorithms have been migrated, the API calls can become static inlines in to mimic what is doing. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Signed-off-by: Herbert Xu --- include/crypto/sig.h | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/sig.h b/include/crypto/sig.h index f0f52a7c5ae7..bbc902642bf5 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -130,7 +130,12 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ -int crypto_sig_maxsize(struct crypto_sig *tfm); +static inline int crypto_sig_maxsize(struct crypto_sig *tfm) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->max_size(tfm); +} /** * crypto_sig_sign() - Invoke signing operation @@ -145,9 +150,14 @@ int crypto_sig_maxsize(struct crypto_sig *tfm); * * Return: zero on success; error code in case of error */ -int crypto_sig_sign(struct crypto_sig *tfm, - const void *src, unsigned int slen, - void *dst, unsigned int dlen); +static inline int crypto_sig_sign(struct crypto_sig *tfm, + const void *src, unsigned int slen, + void *dst, unsigned int dlen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->sign(tfm, src, slen, dst, dlen); +} /** * crypto_sig_verify() - Invoke signature verification @@ -163,9 +173,14 @@ int crypto_sig_sign(struct crypto_sig *tfm, * * Return: zero on verification success; error code in case of error. */ -int crypto_sig_verify(struct crypto_sig *tfm, - const void *src, unsigned int slen, - const void *digest, unsigned int dlen); +static inline int crypto_sig_verify(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->verify(tfm, src, slen, digest, dlen); +} /** * crypto_sig_set_pubkey() - Invoke set public key operation @@ -180,8 +195,13 @@ int crypto_sig_verify(struct crypto_sig *tfm, * * Return: zero on success; error code in case of error */ -int crypto_sig_set_pubkey(struct crypto_sig *tfm, - const void *key, unsigned int keylen); +static inline int crypto_sig_set_pubkey(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->set_pub_key(tfm, key, keylen); +} /** * crypto_sig_set_privkey() - Invoke set private key operation @@ -196,6 +216,11 @@ int crypto_sig_set_pubkey(struct crypto_sig *tfm, * * Return: zero on success; error code in case of error */ -int crypto_sig_set_privkey(struct crypto_sig *tfm, - const void *key, unsigned int keylen); +static inline int crypto_sig_set_privkey(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->set_priv_key(tfm, key, keylen); +} #endif -- cgit v1.2.3 From 4df86c6ea5c37fe0452638f39a1e4b189da75c54 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:23 +0200 Subject: ASN.1: Clean up include statements in public headers If is the first header included from a .c file (due to headers being sorted alphabetically), the compiler complains: include/linux/asn1_decoder.h:18:29: error: unknown type name 'size_t' Avoid by including . Jonathan notes that the counterpart already includes , but additionally includes the unnecessary . Drop it. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- include/linux/asn1_decoder.h | 1 + include/linux/asn1_encoder.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/asn1_decoder.h b/include/linux/asn1_decoder.h index 83f9c6e1e5e9..b41bce82a191 100644 --- a/include/linux/asn1_decoder.h +++ b/include/linux/asn1_decoder.h @@ -9,6 +9,7 @@ #define _LINUX_ASN1_DECODER_H #include +#include struct asn1_decoder; diff --git a/include/linux/asn1_encoder.h b/include/linux/asn1_encoder.h index 08cd0c2ad34f..d17484dffb74 100644 --- a/include/linux/asn1_encoder.h +++ b/include/linux/asn1_encoder.h @@ -6,7 +6,6 @@ #include #include #include -#include #define asn1_oid_len(oid) (sizeof(oid)/sizeof(u32)) unsigned char * -- cgit v1.2.3 From d6793ff974e07e4eea151d1f0805e92d042825a1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:25 +0200 Subject: crypto: ecdsa - Move X9.62 signature decoding into template Unlike the rsa driver, which separates signature decoding and signature verification into two steps, the ecdsa driver does both in one. This restricts users to the one signature format currently supported (X9.62) and prevents addition of others such as P1363, which is needed by the forthcoming SPDM library (Security Protocol and Data Model) for PCI device authentication. Per Herbert's suggestion, change ecdsa to use a "raw" signature encoding and then implement X9.62 and P1363 as templates which convert their respective encodings to the raw one. One may then specify "x962(ecdsa-nist-XXX)" or "p1363(ecdsa-nist-XXX)" to pick the encoding. The present commit moves X9.62 decoding to a template. A separate commit is going to introduce another template for P1363 decoding. The ecdsa driver internally represents a signature as two u64 arrays of size ECC_MAX_BYTES. This appears to be the most natural choice for the raw format as it can directly be used for verification without having to further decode signature data or copy it around. Repurpose all the existing test vectors for "x962(ecdsa-nist-XXX)" and create a duplicate of them to test the raw encoding. Link: https://lore.kernel.org/all/ZoHXyGwRzVvYkcTP@gondor.apana.org.au/ Signed-off-by: Lukas Wunner Tested-by: Stefan Berger Signed-off-by: Herbert Xu --- include/crypto/internal/ecc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 0717a53ae732..68db975e0963 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -42,6 +42,18 @@ #define ECC_POINT_INIT(x, y, ndigits) (struct ecc_point) { x, y, ndigits } +/* + * The integers r and s making up the signature are expected to be + * formatted as two consecutive u64 arrays of size ECC_MAX_BYTES. + * The bytes within each u64 digit are in native endianness, + * but the order of the u64 digits themselves is little endian. + * This format allows direct use by internal vli_*() functions. + */ +struct ecdsa_raw_sig { + u64 r[ECC_MAX_DIGITS]; + u64 s[ECC_MAX_DIGITS]; +}; + /** * ecc_swap_digits() - Copy ndigits from big endian array to native array * @in: Input array @@ -293,4 +305,5 @@ void ecc_point_mult_shamir(const struct ecc_point *result, const u64 *y, const struct ecc_point *q, const struct ecc_curve *curve); +extern struct crypto_template ecdsa_x962_tmpl; #endif -- cgit v1.2.3 From 221f00418e726237dbe38ba627ce08b22d3667f7 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:26 +0200 Subject: crypto: sig - Rename crypto_sig_maxsize() to crypto_sig_keysize() crypto_sig_maxsize() is a bit of a misnomer as it doesn't return the maximum signature size, but rather the key size. Rename it as well as all implementations of the ->max_size callback. A subsequent commit introduces a crypto_sig_maxsize() function which returns the actual maximum signature size. While at it, change the return type of crypto_sig_keysize() from int to unsigned int for consistency with crypto_akcipher_maxsize(). None of the callers checks for a negative return value and an error condition can always be indicated by returning zero. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/crypto/sig.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/crypto/sig.h b/include/crypto/sig.h index bbc902642bf5..a3ef17c5f72f 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -32,7 +32,7 @@ struct crypto_sig { * @set_priv_key: Function invokes the algorithm specific set private key * function, which knows how to decode and interpret * the BER encoded private key and parameters. Optional. - * @max_size: Function returns key size. Mandatory. + * @key_size: Function returns key size. Mandatory. * @init: Initialize the cryptographic transformation object. * This function is used to initialize the cryptographic * transformation object. This function is called only once at @@ -58,7 +58,7 @@ struct sig_alg { const void *key, unsigned int keylen); int (*set_priv_key)(struct crypto_sig *tfm, const void *key, unsigned int keylen); - unsigned int (*max_size)(struct crypto_sig *tfm); + unsigned int (*key_size)(struct crypto_sig *tfm); int (*init)(struct crypto_sig *tfm); void (*exit)(struct crypto_sig *tfm); @@ -121,20 +121,20 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) } /** - * crypto_sig_maxsize() - Get len for output buffer + * crypto_sig_keysize() - Get key size * - * Function returns the dest buffer size required for a given key. + * Function returns the key size in bytes. * Function assumes that the key is already set in the transformation. If this - * function is called without a setkey or with a failed setkey, you will end up + * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ -static inline int crypto_sig_maxsize(struct crypto_sig *tfm) +static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm) { struct sig_alg *alg = crypto_sig_alg(tfm); - return alg->max_size(tfm); + return alg->key_size(tfm); } /** -- cgit v1.2.3 From a2471684dae23a676b4badea306140d24e6507f5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:27 +0200 Subject: crypto: ecdsa - Move X9.62 signature size calculation into template software_key_query() returns the maximum signature and digest size for a given key to user space. When it only supported RSA keys, calculating those sizes was trivial as they were always equivalent to the key size. However when ECDSA was added, the function grew somewhat complicated calculations which take the ASN.1 encoding and curve into account. This doesn't scale well and adjusting the calculations is easily forgotten when adding support for new encodings or curves. In fact, when NIST P521 support was recently added, the function was initially not amended: https://lore.kernel.org/all/b749d5ee-c3b8-4cbd-b252-7773e4536e07@linux.ibm.com/ Introduce a ->max_size() callback to struct sig_alg and take advantage of it to move the signature size calculations to ecdsa-x962.c. Introduce a ->digest_size() callback to struct sig_alg and move the maximum ECDSA digest size to ecdsa.c. It is common across ecdsa-x962.c and the upcoming ecdsa-p1363.c and thus inherited by both of them. For all other algorithms, continue using the key size as maximum signature and digest size. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/crypto/sig.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/crypto/sig.h b/include/crypto/sig.h index a3ef17c5f72f..cff41ad93824 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -33,6 +33,8 @@ struct crypto_sig { * function, which knows how to decode and interpret * the BER encoded private key and parameters. Optional. * @key_size: Function returns key size. Mandatory. + * @digest_size: Function returns maximum digest size. Optional. + * @max_size: Function returns maximum signature size. Optional. * @init: Initialize the cryptographic transformation object. * This function is used to initialize the cryptographic * transformation object. This function is called only once at @@ -59,6 +61,8 @@ struct sig_alg { int (*set_priv_key)(struct crypto_sig *tfm, const void *key, unsigned int keylen); unsigned int (*key_size)(struct crypto_sig *tfm); + unsigned int (*digest_size)(struct crypto_sig *tfm); + unsigned int (*max_size)(struct crypto_sig *tfm); int (*init)(struct crypto_sig *tfm); void (*exit)(struct crypto_sig *tfm); @@ -137,6 +141,40 @@ static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm) return alg->key_size(tfm); } +/** + * crypto_sig_digestsize() - Get maximum digest size + * + * Function returns the maximum digest size in bytes. + * Function assumes that the key is already set in the transformation. If this + * function is called without a setkey or with a failed setkey, you may end up + * in a NULL dereference. + * + * @tfm: signature tfm handle allocated with crypto_alloc_sig() + */ +static inline unsigned int crypto_sig_digestsize(struct crypto_sig *tfm) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->digest_size(tfm); +} + +/** + * crypto_sig_maxsize() - Get maximum signature size + * + * Function returns the maximum signature size in bytes. + * Function assumes that the key is already set in the transformation. If this + * function is called without a setkey or with a failed setkey, you may end up + * in a NULL dereference. + * + * @tfm: signature tfm handle allocated with crypto_alloc_sig() + */ +static inline unsigned int crypto_sig_maxsize(struct crypto_sig *tfm) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->max_size(tfm); +} + /** * crypto_sig_sign() - Invoke signing operation * -- cgit v1.2.3 From b04163863caf599d4348a05af5a71cf5d42f11dc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:28 +0200 Subject: crypto: ecdsa - Support P1363 signature decoding Alternatively to the X9.62 encoding of ecdsa signatures, which uses ASN.1 and is already supported by the kernel, there's another common encoding called P1363. It stores r and s as the concatenation of two big endian, unsigned integers. The name originates from IEEE P1363. Add a P1363 template in support of the forthcoming SPDM library (Security Protocol and Data Model) for PCI device authentication. P1363 is prescribed by SPDM 1.2.1 margin no 44: "For ECDSA signatures, excluding SM2, in SPDM, the signature shall be the concatenation of r and s. The size of r shall be the size of the selected curve. Likewise, the size of s shall be the size of the selected curve. See BaseAsymAlgo in NEGOTIATE_ALGORITHMS for the size of r and s. The byte order for r and s shall be in big endian order. When placing ECDSA signatures into an SPDM signature field, r shall come first followed by s." Link: https://www.dmtf.org/sites/default/files/standards/documents/DSP0274_1.2.1.pdf Signed-off-by: Lukas Wunner Reviewed-by: Jonathan Cameron Reviewed-by: Stefan Berger Signed-off-by: Herbert Xu --- include/crypto/internal/ecc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 68db975e0963..71483e5305e1 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -306,4 +306,5 @@ void ecc_point_mult_shamir(const struct ecc_point *result, const struct ecc_curve *curve); extern struct crypto_template ecdsa_x962_tmpl; +extern struct crypto_template ecdsa_p1363_tmpl; #endif -- cgit v1.2.3 From 452c55dcefa958f7e87798d764497485687e4bc3 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 29 Sep 2024 19:26:57 +0800 Subject: crypto: hisilicon/qm - fix the coding specifications issue Ensure that the inline function contains no more than 10 lines. move q_num_set() from hisi_acc_qm.h to qm.c. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 9d7754ad5e9b..389e95754776 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -436,37 +436,6 @@ struct hisi_qp { struct uacce_queue *uacce_q; }; -static inline int q_num_set(const char *val, const struct kernel_param *kp, - unsigned int device) -{ - struct pci_dev *pdev; - u32 n, q_num; - int ret; - - if (!val) - return -EINVAL; - - pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); - if (!pdev) { - q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); - pr_info("No device found currently, suppose queue number is %u\n", - q_num); - } else { - if (pdev->revision == QM_HW_V1) - q_num = QM_QNUM_V1; - else - q_num = QM_QNUM_V2; - - pci_dev_put(pdev); - } - - ret = kstrtou32(val, 10, &n); - if (ret || n < QM_MIN_QNUM || n > q_num) - return -EINVAL; - - return param_set_int(val, kp); -} - static inline int vfs_num_set(const char *val, const struct kernel_param *kp) { u32 n; @@ -526,6 +495,8 @@ static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_ mutex_unlock(&qm_list->lock); } +int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device); int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); -- cgit v1.2.3 From 21e92806d39c68af2accd1fb238c2daecfcf9fbd Mon Sep 17 00:00:00 2001 From: Donglin Peng Date: Sat, 14 Sep 2024 20:29:12 -0700 Subject: function_graph: Support recording and printing the function return address When using function_graph tracer to analyze the flow of kernel function execution, it is often necessary to quickly locate the exact line of code where the call occurs. While this may be easy at times, it can be more time-consuming when some functions are inlined or the flow is too long. This feature aims to simplify the process by recording the return address of traced funcions and printing it when outputing trace logs. To enhance human readability, the prefix 'ret=' is used for the kernel return value, while '<-' serves as the prefix for the return address in trace logs to make it look more like the function tracer. A new trace option named 'funcgraph-retaddr' has been introduced, and the existing option 'sym-addr' can be used to control the format of the return address. See below logs with both funcgraph-retval and funcgraph-retaddr enabled. 0) | load_elf_binary() { /* <-bprm_execve+0x249/0x600 */ 0) | load_elf_phdrs() { /* <-load_elf_binary+0x84/0x1730 */ 0) | __kmalloc_noprof() { /* <-load_elf_phdrs+0x4a/0xb0 */ 0) 3.657 us | __cond_resched(); /* <-__kmalloc_noprof+0x28c/0x390 ret=0x0 */ 0) + 24.335 us | } /* __kmalloc_noprof ret=0xffff8882007f3000 */ 0) | kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */ 0) | rw_verify_area() { /* <-kernel_read+0x2b/0x50 */ 0) | security_file_permission() { /* <-kernel_read+0x2b/0x50 */ 0) | selinux_file_permission() { /* <-security_file_permission+0x26/0x40 */ 0) | __inode_security_revalidate() { /* <-selinux_file_permission+0x6d/0x140 */ 0) 2.034 us | __cond_resched(); /* <-__inode_security_revalidate+0x5f/0x80 ret=0x0 */ 0) 6.602 us | } /* __inode_security_revalidate ret=0x0 */ 0) 2.214 us | avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 ret=0x0 */ 0) + 16.670 us | } /* selinux_file_permission ret=0x0 */ 0) + 20.809 us | } /* security_file_permission ret=0x0 */ 0) + 25.217 us | } /* rw_verify_area ret=0x0 */ 0) | __kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */ 0) | ext4_file_read_iter() { /* <-__kernel_read+0x160/0x2e0 */ Then, we can use the faddr2line to locate the source code, for example: $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0 load_elf_phdrs+0x6c/0xb0: elf_read at fs/binfmt_elf.c:471 (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531 Link: https://lore.kernel.org/20240915032912.1118397-1-dolinux.peng@gmail.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409150605.HgUmU8ea-lkp@intel.com/ Signed-off-by: Donglin Peng [ Rebased to handle text_delta offsets ] Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e684addf6508..2ac3b3b53cd0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1040,6 +1040,17 @@ struct ftrace_graph_ent { int depth; } __packed; +/* + * Structure that defines an entry function trace with retaddr. + * It's already packed but the attribute "packed" is needed + * to remove extra padding at the end. + */ +struct fgraph_retaddr_ent { + unsigned long func; /* Current function */ + int depth; + unsigned long retaddr; /* Return address */ +} __packed; + /* * Structure that defines a return function trace. * It's already packed but the attribute "packed" is needed @@ -1057,19 +1068,29 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; +struct fgraph_extras; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, struct fgraph_ops *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *); /* entry */ + struct fgraph_ops *, + struct fgraph_extras *); /* entry */ -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct fgraph_extras *extras); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* Used to convey some extra datas when creating a graph entry */ +struct fgraph_extras { + u32 flags; + unsigned long retaddr; +}; + struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; @@ -1115,6 +1136,8 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); +u32 graph_tracer_flags_get(u32 flags); + /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function -- cgit v1.2.3 From 4436df478860bb5da1864df2cd20f281a210f139 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 7 Jun 2024 18:19:12 +0200 Subject: batman-adv: Add flex array to struct batadv_tvlv_tt_data The "struct batadv_tvlv_tt_data" uses a dynamically sized set of trailing elements. Specifically, it uses an array of structures of type "batadv_tvlv_tt_vlan_data". So, use the preferred way in the kernel declaring a flexible array [1]. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the attribute used is specifically __counted_by_be since variable "num_vlan" is of type __be16. The following change to the "batadv_tt_tvlv_ogm_handler_v1" function: - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); - tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); + tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data + + flex_size); is intended to prevent the compiler from generating an "out-of-bounds" notification due to the __counted_by attribute. The compiler can do a pointer calculation using the vlan_data flexible array memory, or in other words, this may be calculated as an array offset, since it is the same as: &tt_data->vlan_data[num_vlan] Therefore, we go past the end of the array. In other "multiple trailing flexible array" situations, this has been solved by addressing from the base pointer, since the compiler either knows the full allocation size or it knows nothing about it (this case, since it came from a "void *" function argument). The order in which the structure batadv_tvlv_tt_data and the structure batadv_tvlv_tt_vlan_data are defined must be swap to avoid an incomplete type error. Also, avoid the open-coded arithmetic in memory allocator functions [2] using the "struct_size" macro and use the "flex_array_size" helper to clarify some calculations, when possible. Moreover, the new structure member also allow us to avoid the open-coded arithmetic on pointers in some situations. Take advantage of this. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/next/process/deprecated.html#zero-length-and-one-element-arrays [1] Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 6e25753015df..439132a819ea 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -9,6 +9,7 @@ #include #include +#include #include /** @@ -592,19 +593,6 @@ struct batadv_tvlv_gateway_data { __be32 bandwidth_up; }; -/** - * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container - * @flags: translation table flags (see batadv_tt_data_flags) - * @ttvn: translation table version number - * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by - * one batadv_tvlv_tt_vlan_data object per announced vlan - */ -struct batadv_tvlv_tt_data { - __u8 flags; - __u8 ttvn; - __be16 num_vlan; -}; - /** * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through * the tt tvlv container @@ -618,6 +606,21 @@ struct batadv_tvlv_tt_vlan_data { __u16 reserved; }; +/** + * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container + * @flags: translation table flags (see batadv_tt_data_flags) + * @ttvn: translation table version number + * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by + * one batadv_tvlv_tt_vlan_data object per announced vlan + * @vlan_data: array of batadv_tvlv_tt_vlan_data objects + */ +struct batadv_tvlv_tt_data { + __u8 flags; + __u8 ttvn; + __be16 num_vlan; + struct batadv_tvlv_tt_vlan_data vlan_data[] __counted_by_be(num_vlan); +}; + /** * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see -- cgit v1.2.3 From 61b17d072d811df5733a1570889b8c6fa6834bf8 Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 18 Aug 2024 23:43:39 +0300 Subject: dt-bindings: clock: qcom,gcc-sm8450: Add SM8475 GCC bindings Add new entry to the SM8450 dt-bindings and add SM8475-specific clocks to SM8450 GCC header file. Signed-off-by: Danila Tikhonov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240818204348.197788-2-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-sm8450.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sm8450.h b/include/dt-bindings/clock/qcom,gcc-sm8450.h index 9679410843a0..7320e63c3a2f 100644 --- a/include/dt-bindings/clock/qcom,gcc-sm8450.h +++ b/include/dt-bindings/clock/qcom,gcc-sm8450.h @@ -194,6 +194,9 @@ #define GCC_VIDEO_AXI0_CLK 182 #define GCC_VIDEO_AXI1_CLK 183 #define GCC_VIDEO_XO_CLK 184 +/* Additional SM8475-specific clocks */ +#define SM8475_GCC_GPLL2 185 +#define SM8475_GCC_GPLL3 186 /* GCC resets */ #define GCC_CAMERA_BCR 0 -- cgit v1.2.3 From 421d2251fbeac8ab4308ab6653a9252dc4efa6c9 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 3 Oct 2024 13:46:39 +0200 Subject: dt-bindings: iio: adc: Add the GE HealthCare PMC ADC The GE HealthCare PMC Analog to Digital Converter (ADC) is a 16-Channel (voltage and current), 16-Bit ADC with an I2C Interface. Signed-off-by: Herve Codina Tested-by: Ian Ray Reviewed-by: Conor Dooley Link: https://patch.msgid.link/20241003114641.672086-3-herve.codina@bootlin.com Signed-off-by: Jonathan Cameron --- include/dt-bindings/iio/adc/gehc,pmc-adc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/dt-bindings/iio/adc/gehc,pmc-adc.h (limited to 'include') diff --git a/include/dt-bindings/iio/adc/gehc,pmc-adc.h b/include/dt-bindings/iio/adc/gehc,pmc-adc.h new file mode 100644 index 000000000000..2f291e3c76ae --- /dev/null +++ b/include/dt-bindings/iio/adc/gehc,pmc-adc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_IIO_ADC_GEHC_PMC_ADC_H +#define _DT_BINDINGS_IIO_ADC_GEHC_PMC_ADC_H + +/* ADC channel type */ +#define GEHC_PMC_ADC_VOLTAGE 0 +#define GEHC_PMC_ADC_CURRENT 1 + +#endif -- cgit v1.2.3 From 70c8fd00a9bd0509bbf7bccd9baea8bbd5ddc756 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:16 -0400 Subject: timekeeping: Add interfaces for handling timestamps with a floor value Multigrain timestamps allow the kernel to use fine-grained timestamps when an inode's attributes is being actively observed via ->getattr(). With this support, it's possible for a file to get a fine-grained timestamp, and another modified after it to get a coarse-grained stamp that is earlier than the fine-grained time. If this happens then the files can appear to have been modified in reverse order, which breaks VFS ordering guarantees [1]. To prevent this, maintain a floor value for multigrain timestamps. Whenever a fine-grained timestamp is handed out, record it, and when later coarse-grained stamps are handed out, ensure they are not earlier than that value. If the coarse-grained timestamp is earlier than the fine-grained floor, return the floor value instead. Add a static singleton atomic64_t into timekeeper.c that is used to keep track of the latest fine-grained time ever handed out. This is tracked as a monotonic ktime_t value to ensure that it isn't affected by clock jumps. Because it is updated at different times than the rest of the timekeeper object, the floor value is managed independently of the timekeeper via a cmpxchg() operation, and sits on its own cacheline. Add two new public interfaces: - ktime_get_coarse_real_ts64_mg() fills a timespec64 with the later of the coarse-grained clock and the floor time - ktime_get_real_ts64_mg() gets the fine-grained clock value, and tries to swap it into the floor. A timespec64 is filled with the result. The floor value is global and updated via a single try_cmpxchg(). If that fails then the operation raced with a concurrent update. Any concurrent update must be later than the existing floor value, so any racing tasks can accept any resulting floor value without retrying. [1]: POSIX requires that files be stamped with realtime clock values, and makes no provision for dealing with backward clock jumps. If a backward realtime clock jump occurs, then files can appear to have been modified in reverse order. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Acked-by: John Stultz Link: https://lore.kernel.org/all/20241002-mgtime-v10-1-d1c4717f5284@kernel.org --- include/linux/timekeeping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..7aa85246c183 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -45,6 +45,10 @@ extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +/* Multigrain timestamp interfaces */ +extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); +extern void ktime_get_real_ts64_mg(struct timespec64 *ts); + void getboottime64(struct timespec64 *ts); /* -- cgit v1.2.3 From 96f9a366ec8abe027326d7aab84d64370019f0f1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:17 -0400 Subject: timekeeping: Add percpu counter for tracking floor swap events The mgtime_floor value is a global variable for tracking the latest fine-grained timestamp handed out. Because it's a global, track the number of times that a new floor value is assigned. Add a new percpu counter to the timekeeping code to track the number of floor swap events that have occurred. A later patch will add a debugfs file to display this counter alongside other stats involving multigrain timestamps. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Link: https://lore.kernel.org/all/20241002-mgtime-v10-2-d1c4717f5284@kernel.org --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7aa85246c183..84a035e86ac8 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); extern void ktime_get_real_ts64_mg(struct timespec64 *ts); +extern unsigned long timekeeping_get_mg_floor_swaps(void); void getboottime64(struct timespec64 *ts); -- cgit v1.2.3 From 51976c6cd786151b6a1bdf8b8b3334beac0ba99c Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 27 Sep 2024 18:33:22 +0800 Subject: RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages Provide a new api rdma_user_mmap_disassociate() for drivers to disassociate mmap pages for a device. Since drivers can now disassociate mmaps by calling this api, introduce a new disassociation_lock to specifically prevent races between this disassociation process and new mmaps. And thus the old hw_destroy_rwsem is not needed in this api. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240927103323.1897094-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index aa8ede439905..9cb8b5fe7eee 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +void rdma_user_mmap_disassociate(struct ib_device *device); +#else +static inline void rdma_user_mmap_disassociate(struct ib_device *device) +{ +} +#endif + static inline int rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, -- cgit v1.2.3 From 2382d68d7d43873ba856baf567cab0d5c523f23b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:38 +1000 Subject: sched: change wake_up_bit() and related function to expect unsigned long * wake_up_bit() currently allows a "void *". While this isn't strictly a problem as the address is never dereferenced, it is inconsistent with the corresponding wait_on_bit() which requires "unsigned long *" and does dereference the pointer. Any code that needs to wait for a change in something other than an unsigned long would be better served by wake_up_var()/wait_var_event(). This patch changes all related "void *" to "unsigned long *". Reported-by: Linus Torvalds Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-2-neilb@suse.de --- include/linux/wait_bit.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7725b7579b78..48e123839892 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -8,7 +8,7 @@ #include struct wait_bit_key { - void *flags; + unsigned long *flags; int bit_nr; unsigned long timeout; }; @@ -23,14 +23,14 @@ struct wait_bit_queue_entry { typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); +void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -void wake_up_bit(void *word, int bit); -int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); -int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -struct wait_queue_head *bit_waitqueue(void *word, int bit); +void wake_up_bit(unsigned long *word, int bit); +int out_of_line_wait_on_bit(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_bit_timeout(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); +int out_of_line_wait_on_bit_lock(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode); +struct wait_queue_head *bit_waitqueue(unsigned long *word, int bit); extern void __init wait_bit_init(void); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); @@ -327,7 +327,7 @@ do { \ * You can use this helper if bitflags are manipulated atomically rather than * non-atomically under a lock. */ -static inline void clear_and_wake_up_bit(int bit, void *word) +static inline void clear_and_wake_up_bit(int bit, unsigned long *word) { clear_bit_unlock(bit, word); /* See wake_up_bit() for which memory barrier you need to use. */ -- cgit v1.2.3 From 3cdee6b359f134da22f7fd4606e0338413cfd79e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:39 +1000 Subject: sched: Improve documentation for wake_up_bit/wait_on_bit family of functions This patch revises the documention for wake_up_bit(), clear_and_wake_up_bit(), and all the wait_on_bit() family of functions. The new documentation places less emphasis on the pool of waitqueues used (an implementation detail) and focuses instead on details of how the functions behave. The barriers included in the wait functions and clear_and_wake_up_bit() and those required for wake_up_bit() are spelled out more clearly. The error statuses returned are given explicitly. The fact that the wait_on_bit_lock() function sets the bit is made more obvious. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-3-neilb@suse.de --- include/linux/wait_bit.h | 159 +++++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 48e123839892..723e7bf35747 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -53,19 +53,21 @@ extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); /** * wait_on_bit - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit. - * For instance, if one were to have waiters on a bitflag, one would - * call wait_on_bit() in threads waiting for the bit to clear. - * One uses wait_on_bit() where one is waiting for the bit to clear, - * but has no intention of setting it. - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the target bit, even + * if other processes on the same queue are waiting for other bits. + * + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal. */ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) @@ -80,17 +82,20 @@ wait_on_bit(unsigned long *word, int bit, unsigned mode) /** * wait_on_bit_io - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), but calls - * io_schedule() instead of schedule() for the actual waiting. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * This is similar to wait_on_bit(), but calls io_schedule() instead of + * schedule() for the actual waiting. * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal. */ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) @@ -104,19 +109,24 @@ wait_on_bit_io(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout to elapse + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * @timeout: timeout, in jiffies * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), except also takes a - * timeout parameter. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared, or for a timeout to expire. The + * clearing of the bit must be signalled with wake_up_bit(), often as + * clear_and_wake_up_bit(). * - * Returned value will be zero if the bit was cleared before the - * @timeout elapsed, or non-zero if the @timeout elapsed or process - * received a signal and the mode permitted wakeup on that signal. + * This is similar to wait_on_bit(), except it also takes a timeout + * parameter. + * + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal, or %-EAGAIN if the + * timeout elapsed. */ static inline int wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, @@ -132,19 +142,21 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, /** * wait_on_bit_action - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit waited on + * @bit: the bit at that address being waited on * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared, and allow the waiting action to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * This is similar to wait_on_bit(), but calls @action() instead of + * schedule() for the actual waiting. * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or the error code returned by @action if + * that call returned non-zero. */ static inline int wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, @@ -157,23 +169,22 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, } /** - * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @mode: the task state to sleep in * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit - * when one intends to set it, for instance, trying to lock bitflags. - * For instance, if one were to have waiters trying to set bitflag - * and waiting for it to clear before setting it, one would call - * wait_on_bit() in threads waiting to be able to set the bit. - * One uses wait_on_bit_lock() where one is waiting for the bit to - * clear with the intention of setting it, and when done, clearing it. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be + * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As + * soon as it is clear, atomically set it and return. * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. + * This is similar to wait_on_bit(), but sets the bit before returning. + * + * Returned value will be zero if the bit was successfully set in which + * case the call has the same memory sequencing semantics as + * test_and_clear_bit(), or %-EINTR if the process received a signal and + * the mode permitted wake up on that signal. */ static inline int wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) @@ -185,15 +196,18 @@ wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock_io - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to atomically set it. This is similar - * to wait_on_bit(), but calls io_schedule() instead of schedule() - * for the actual waiting. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be + * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As + * soon as it is clear, atomically set it and return. + * + * This is similar to wait_on_bit_lock(), but calls io_schedule() instead + * of schedule(). * * Returns zero if the bit was (eventually) found to be clear and was * set. Returns non-zero if a signal was delivered to the process and @@ -209,21 +223,19 @@ wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock_action - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to set it, and allow the waiting action - * to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. + * This is similar to wait_on_bit_lock(), but calls @action() instead of + * schedule() for the actual waiting. * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. + * Returned value will be zero if the bit was successfully set in which + * case the call has the same memory sequencing semantics as + * test_and_clear_bit(), or the error code returned by @action if that + * call returned non-zero. */ static inline int wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, @@ -320,12 +332,13 @@ do { \ /** * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit - * * @bit: the bit of the word being waited on - * @word: the word being waited on, a kernel virtual address + * @word: the address containing the bit being waited on * - * You can use this helper if bitflags are manipulated atomically rather than - * non-atomically under a lock. + * The designated bit is cleared and any tasks waiting in wait_on_bit() + * or similar will be woken. This call has RELEASE semantics so that + * any changes to memory made before this call are guaranteed to be visible + * after the corresponding wait_on_bit() completes. */ static inline void clear_and_wake_up_bit(int bit, unsigned long *word) { -- cgit v1.2.3 From bf39882edc798279765ca31751f6e679b50b97ef Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:40 +1000 Subject: sched: Document wait_var_event() family of functions and wake_up_var() wake_up_var(), wait_var_event() and related interfaces are not documented but have important ordering requirements. This patch adds documentation and makes these requirements explicit. The return values for those wait_var_event_* functions which return a value are documented. Note that these are, perhaps surprisingly, sometimes different from comparable wait_on_bit() functions. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-4-neilb@suse.de --- include/linux/wait_bit.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 723e7bf35747..06ec99b90bf3 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -282,6 +282,22 @@ __out: __ret; \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) +/** + * wait_var_event - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true, only re-checking when a wake up is + * received for the given @var (an arbitrary kernel address which need + * not be directly related to the given condition, but usually is). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the given address. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event(var, condition) \ do { \ might_sleep(); \ @@ -294,6 +310,24 @@ do { \ ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ schedule()) +/** + * wait_var_event_killable - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true or a fatal signal to be received, + * only re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is + * 0 if the condition became true, or %-ERESTARTSYS if a fatal signal + * was received. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_killable(var, condition) \ ({ \ int __ret = 0; \ @@ -308,6 +342,26 @@ do { \ TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) +/** + * wait_var_event_timeout - wait for a variable to be updated or a timeout to expire + * @var: the address of variable being waited on + * @condition: the condition to wait for + * @timeout: maximum time to wait in jiffies + * + * Wait for a @condition to be true or a timeout to expire, only + * re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is 0 if + * the timeout expired and the condition was still false, or the + * remaining time left in the timeout (but at least 1) if the condition + * was found to be true. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_timeout(var, condition, timeout) \ ({ \ long __ret = timeout; \ @@ -321,6 +375,23 @@ do { \ ___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule()) +/** + * wait_var_event_killable - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true or a signal to be received, only + * re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is 0 if + * the condition became true, or %-ERESTARTSYS if a signal was received. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_interruptible(var, condition) \ ({ \ int __ret = 0; \ -- cgit v1.2.3 From 52d633def56c10fe3e82a2c5d88c3ecb3f4e4852 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:41 +1000 Subject: sched: Add test_and_clear_wake_up_bit() and atomic_dec_and_wake_up() There are common patterns in the kernel of using test_and_clear_bit() before wake_up_bit(), and atomic_dec_and_test() before wake_up_var(). These combinations don't need extra barriers but sometimes include them unnecessarily. To help avoid the unnecessary barriers and to help discourage the general use of wake_up_bit/var (which is a fragile interface) introduce two combined functions which implement these patterns. Also add store_release_wake_up() which supports the task of simply setting a non-atomic variable and sending a wakeup. This pattern requires barriers which are often omitted. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-5-neilb@suse.de --- include/linux/wait_bit.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 06ec99b90bf3..0272629b590a 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -419,4 +419,64 @@ static inline void clear_and_wake_up_bit(int bit, unsigned long *word) wake_up_bit(word, bit); } +/** + * test_and_clear_wake_up_bit - clear a bit if it was set: wake up anyone waiting on that bit + * @bit: the bit of the word being waited on + * @word: the address of memory containing that bit + * + * If the bit is set and can be atomically cleared, any tasks waiting in + * wait_on_bit() or similar will be woken. This call has the same + * complete ordering semantics as test_and_clear_bit(). Any changes to + * memory made before this call are guaranteed to be visible after the + * corresponding wait_on_bit() completes. + * + * Returns %true if the bit was successfully set and the wake up was sent. + */ +static inline bool test_and_clear_wake_up_bit(int bit, unsigned long *word) +{ + if (!test_and_clear_bit(bit, word)) + return false; + /* no extra barrier required */ + wake_up_bit(word, bit); + return true; +} + +/** + * atomic_dec_and_wake_up - decrement an atomic_t and if zero, wake up waiters + * @var: the variable to dec and test + * + * Decrements the atomic variable and if it reaches zero, send a wake_up to any + * processes waiting on the variable. + * + * This function has the same complete ordering semantics as atomic_dec_and_test. + * + * Returns %true is the variable reaches zero and the wake up was sent. + */ + +static inline bool atomic_dec_and_wake_up(atomic_t *var) +{ + if (!atomic_dec_and_test(var)) + return false; + /* No extra barrier required */ + wake_up_var(var); + return true; +} + +/** + * store_release_wake_up - update a variable and send a wake_up + * @var: the address of the variable to be updated and woken + * @val: the value to store in the variable. + * + * Store the given value in the variable send a wake up to any tasks + * waiting on the variable. All necessary barriers are included to ensure + * the task calling wait_var_event() sees the new value and all values + * written to memory before this call. + */ +#define store_release_wake_up(var, val) \ +do { \ + smp_store_release(var, val); \ + smp_mb(); \ + wake_up_var(var); \ +} while (0) + #endif /* _LINUX_WAIT_BIT_H */ -- cgit v1.2.3 From cc2e1c82d7e474753681a38b07b63034e107e369 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:42 +1000 Subject: sched: Add wait/wake interface for variable updated under a lock. Sometimes we need to wait for a condition to be true which must be testing while holding a lock. Correspondingly the condition is made true while holding the lock and the wake up is sent under the lock. This patch provides wake and wait interfaces which can be used for this situation when the lock is a mutex or a spinlock, or any other lock for which there are foo_lock() and foo_unlock() functions. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-6-neilb@suse.de --- include/linux/wait_bit.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 0272629b590a..6aea10efca3d 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -401,6 +401,112 @@ do { \ __ret; \ }) +/** + * wait_var_event_any_lock - wait for a variable to be updated under a lock + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @lock: the object that is locked to protect updates to the variable + * @type: prefix on lock and unlock operations + * @state: waiting state, %TASK_UNINTERRUPTIBLE etc. + * + * Wait for a condition which can only be reliably tested while holding + * a lock. The variables assessed in the condition will normal be updated + * under the same lock, and the wake up should be signalled with + * wake_up_var_locked() under the same lock. + * + * This is similar to wait_var_event(), but assumes a lock is held + * while calling this function and while updating the variable. + * + * This must be called while the given lock is held and the lock will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. The functions used to + * unlock and lock the object are constructed by appending _unlock and _lock + * to @type. + * + * Return %-ERESTARTSYS if a signal arrives which is allowed to interrupt + * the wait according to @state. + */ +#define wait_var_event_any_lock(var, condition, lock, type, state) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = ___wait_var_event(var, condition, state, 0, 0, \ + type ## _unlock(lock); \ + schedule(); \ + type ## _lock(lock)); \ + __ret; \ +}) + +/** + * wait_var_event_spinlock - wait for a variable to be updated under a spinlock + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @lock: the spinlock which protects updates to the variable + * + * Wait for a condition which can only be reliably tested while holding + * a spinlock. The variables assessed in the condition will normal be updated + * under the same spinlock, and the wake up should be signalled with + * wake_up_var_locked() under the same spinlock. + * + * This is similar to wait_var_event(), but assumes a spinlock is held + * while calling this function and while updating the variable. + * + * This must be called while the given lock is held and the lock will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. + */ +#define wait_var_event_spinlock(var, condition, lock) \ + wait_var_event_any_lock(var, condition, lock, spin, TASK_UNINTERRUPTIBLE) + +/** + * wait_var_event_mutex - wait for a variable to be updated under a mutex + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @mutex: the mutex which protects updates to the variable + * + * Wait for a condition which can only be reliably tested while holding + * a mutex. The variables assessed in the condition will normal be + * updated under the same mutex, and the wake up should be signalled + * with wake_up_var_locked() under the same mutex. + * + * This is similar to wait_var_event(), but assumes a mutex is held + * while calling this function and while updating the variable. + * + * This must be called while the given mutex is held and the mutex will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. + */ +#define wait_var_event_mutex(var, condition, lock) \ + wait_var_event_any_lock(var, condition, lock, mutex, TASK_UNINTERRUPTIBLE) + +/** + * wake_up_var_protected - wake up waiters for a variable asserting that it is safe + * @var: the address of the variable being waited on + * @cond: the condition which afirms this is safe + * + * When waking waiters which use wait_var_event_any_lock() the waker must be + * holding the reelvant lock to avoid races. This version of wake_up_var() + * asserts that the relevant lock is held and so no barrier is needed. + * The @cond is only tested when CONFIG_LOCKDEP is enabled. + */ +#define wake_up_var_protected(var, cond) \ +do { \ + lockdep_assert(cond); \ + wake_up_var(var); \ +} while (0) + +/** + * wake_up_var_locked - wake up waiters for a variable while holding a spinlock or mutex + * @var: the address of the variable being waited on + * @lock: The spinlock or mutex what protects the variable + * + * Send a wake up for the given variable which should be waited for with + * wait_var_event_spinlock() or wait_var_event_mutex(). Unlike wake_up_var(), + * no extra barriers are needed as the locking provides sufficient sequencing. + */ +#define wake_up_var_locked(var, lock) \ + wake_up_var_protected(var, lockdep_is_held(lock)) + /** * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit * @bit: the bit of the word being waited on -- cgit v1.2.3 From 80681c04c5e8e4297b9ebf201ca3ce6242aa16c3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:43 +1000 Subject: sched: add wait_var_event_io() It is not currently possible to wait wait_var_event for an io_schedule() style wait. This patch adds wait_var_event_io() for that purpose. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-7-neilb@suse.de --- include/linux/wait_bit.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 6aea10efca3d..6346e26fbfd1 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -281,6 +281,9 @@ __out: __ret; \ #define __wait_var_event(var, condition) \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) +#define __wait_var_event_io(var, condition) \ + ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + io_schedule()) /** * wait_var_event - wait for a variable to be updated and notified @@ -306,6 +309,34 @@ do { \ __wait_var_event(var, condition); \ } while (0) +/** + * wait_var_event_io - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for an IO related @condition to be true, only re-checking when a + * wake up is received for the given @var (an arbitrary kernel address + * which need not be directly related to the given condition, but + * usually is). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the given address. + * + * This is similar to wait_var_event(), but calls io_schedule() instead + * of schedule(). + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ +#define wait_var_event_io(var, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_var_event_io(var, condition); \ +} while (0) + #define __wait_var_event_killable(var, condition) \ ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ schedule()) -- cgit v1.2.3 From 5e9f0c4819deb9459f32f12c4fd2b47993b8c395 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 30 Sep 2024 05:09:46 +0000 Subject: sched: remove unused __HAVE_THREAD_FUNCTIONS hook support __HAVE_THREAD_FUNCTIONS could be defined by architectures wishing to provide their own task_thread_info(), task_stack_page(), setup_thread_stack() and end_of_stack() hooks. Commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture") removed the last upstream consumer of __HAVE_THREAD_FUNCTIONS, so change the remaining !CONFIG_THREAD_INFO_IN_TASK && !__HAVE_THREAD_FUNCTIONS conditionals to only check for the former case. Signed-off-by: David Disseldorp Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ard Biesheuvel Link: https://lkml.kernel.org/r/20240930050945.30304-2-ddiss@suse.de --- include/linux/sched.h | 2 +- include/linux/sched/task_stack.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..abf26f1e1447 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1898,7 +1898,7 @@ extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK # define task_thread_info(task) (&(task)->thread_info) -#elif !defined(__HAVE_THREAD_FUNCTIONS) +#else # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index bf10bdb487dd..2e52cc421bce 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -33,7 +33,7 @@ static __always_inline unsigned long *end_of_stack(const struct task_struct *tas #endif } -#elif !defined(__HAVE_THREAD_FUNCTIONS) +#else #define task_stack_page(task) ((void *)(task)->stack) -- cgit v1.2.3 From 0ac8f14ef22a1592b44dc90272aab35e43b0106a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 2 Oct 2024 00:40:16 +0100 Subject: sched/wait: Remove unused bit_wait_io_timeout bit_wait_io_timeout has been unused since 2016's commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Remove it. Signed-off-by: "Dr. David Alan Gilbert" Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Tim Chen Link: https://lore.kernel.org/r/20241001234016.231696-1-linux@treblig.org --- include/linux/wait_bit.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 6346e26fbfd1..9e29d79fc790 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -49,7 +49,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync extern int bit_wait(struct wait_bit_key *key, int mode); extern int bit_wait_io(struct wait_bit_key *key, int mode); extern int bit_wait_timeout(struct wait_bit_key *key, int mode); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); /** * wait_on_bit - wait for a bit to be cleared -- cgit v1.2.3 From 066c779b094b63754e0742ad8675d72d6c0a46f6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Oct 2024 18:48:19 +0300 Subject: platform/x86: intel_scu_ipc: Don't use "proxy" headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update header inclusions to follow IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20241003154819.1075141-1-andriy.shevchenko@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/intel_scu_ipc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/intel_scu_ipc.h b/include/linux/platform_data/x86/intel_scu_ipc.h index 0ca9962e97f2..b287627759f7 100644 --- a/include/linux/platform_data/x86/intel_scu_ipc.h +++ b/include/linux/platform_data/x86/intel_scu_ipc.h @@ -2,9 +2,13 @@ #ifndef __PLATFORM_X86_INTEL_SCU_IPC_H_ #define __PLATFORM_X86_INTEL_SCU_IPC_H_ +#include #include +#include struct device; +struct module; + struct intel_scu_ipc_dev; /** -- cgit v1.2.3 From 4e40eff0b5737c0de39e1ae5812509efbc0b986e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:18 -0400 Subject: fs: add infrastructure for multigrain timestamps The VFS has always used coarse-grained timestamps when updating the ctime and mtime after a change. This has the benefit of allowing filesystems to optimize away a lot metadata updates, down to around 1 per jiffy, even when a file is under heavy writes. Unfortunately, this has always been an issue when we're exporting via NFSv3, which relies on timestamps to validate caches. A lot of changes can happen in a jiffy, so timestamps aren't sufficient to help the client decide when to invalidate the cache. Even with NFSv4, a lot of exported filesystems don't properly support a change attribute and are subject to the same problems with timestamp granularity. Other applications have similar issues with timestamps (e.g backup applications). If fine-grained timestamps were always used, that would improve the situation, but that becomes rather expensive, as the underlying filesystem would have to log a lot more metadata updates. What is needed is a way to only use fine-grained timestamps when they are being actively queried. Use the (unused) top bit in inode->i_ctime_nsec as a flag that indicates whether the current timestamps have been queried via stat() or the like. When it's set, allow the update to use a fine-grained timestamp iff it's necessary to make the ctime show a different value. If it has been queried, then first see whether the current coarse time is later than the existing ctime. If it is, accept that value. If it isn't, then get a fine-grained timestamp and attempt to stamp the inode ctime with that value. If that races with another concurrent stamp, then abandon the update and take the new value without retrying. Filesystems can opt into this by setting the FS_MGTIME fstype flag. Others should be unaffected (other than being subject to the same floor value as multigrain filesystems). Tested-by: Randy Dunlap # documentation bits Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241002-mgtime-v10-3-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ca11e241a24..eff688e75f2f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1613,6 +1613,17 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, return inode_set_mtime_to_ts(inode, ts); } +/* + * Multigrain timestamps + * + * Conditionally use fine-grained ctime and mtime timestamps when there + * are users actively observing them via getattr. The primary use-case + * for this is NFS clients that use the ctime to distinguish between + * different states of the file, and that are often fooled by multiple + * operations that occur in the same coarse-grained timer tick. + */ +#define I_CTIME_QUERIED ((u32)BIT(31)) + static inline time64_t inode_get_ctime_sec(const struct inode *inode) { return inode->i_ctime_sec; @@ -1620,7 +1631,7 @@ static inline time64_t inode_get_ctime_sec(const struct inode *inode) static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->i_ctime_nsec; + return inode->i_ctime_nsec & ~I_CTIME_QUERIED; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) @@ -1631,13 +1642,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode) return ts; } -static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, - struct timespec64 ts) -{ - inode->i_ctime_sec = ts.tv_sec; - inode->i_ctime_nsec = ts.tv_nsec; - return ts; -} +struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts); /** * inode_set_ctime - set the ctime in the inode @@ -2500,6 +2505,7 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ +#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2523,6 +2529,17 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) +/** + * is_mgtime: is this inode using multigrain timestamps + * @inode: inode to test for multigrain timestamps + * + * Return true if the inode uses multigrain timestamps, false otherwise. + */ +static inline bool is_mgtime(const struct inode *inode) +{ + return inode->i_sb->s_type->fs_flags & FS_MGTIME; +} + extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -3262,6 +3279,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); +void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, -- cgit v1.2.3 From fcd4904e2f6908d5c255fa5818bcf8ad32a6f0e8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Oct 2024 19:23:03 +0100 Subject: netfs: Remove call to folio_index() Calling folio_index() is pointless overhead; directly dereferencing folio->index is fine. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20241005182307.3190401-2-willy@infradead.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 1d7c52821e55..72a208fd4496 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -451,7 +451,7 @@ TRACE_EVENT(netfs_folio, struct address_space *__m = READ_ONCE(folio->mapping); __entry->ino = __m ? __m->host->i_ino : 0; __entry->why = why; - __entry->index = folio_index(folio); + __entry->index = folio->index; __entry->nr = folio_nr_pages(folio); ), -- cgit v1.2.3 From 8fd3395ec9051a52828fcca2328cb50a69dea8ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 31 Jul 2024 11:49:04 -0400 Subject: get rid of ...lookup...fdget_rcu() family Once upon a time, predecessors of those used to do file lookup without bumping a refcount, provided that caller held rcu_read_lock() across the lookup and whatever it wanted to read from the struct file found. When struct file allocation switched to SLAB_TYPESAFE_BY_RCU, that stopped being feasible and these primitives started to bump the file refcount for lookup result, requiring the caller to call fput() afterwards. But that turned them pointless - e.g. rcu_read_lock(); file = lookup_fdget_rcu(fd); rcu_read_unlock(); is equivalent to file = fget_raw(fd); and all callers of lookup_fdget_rcu() are of that form. Similarly, task_lookup_fdget_rcu() calls can be replaced with calling fget_task(). task_lookup_next_fdget_rcu() doesn't have direct counterparts, but its callers would be happier if we replaced it with an analogue that deals with RCU internally. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fdtable.h | 4 ---- include/linux/file.h | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index b1c5722f2b3c..e25e2cb65d30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -92,10 +92,6 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -struct file *lookup_fdget_rcu(unsigned int fd); -struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); - static inline bool close_on_exec(unsigned int fd, const struct files_struct *files) { return test_bit(fd, files_fdtable(files)->close_on_exec); diff --git a/include/linux/file.h b/include/linux/file.h index f98de143245a..ec4ad5e6a061 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -72,6 +72,7 @@ static inline void fdput(struct fd fd) extern struct file *fget(unsigned int fd); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_task(struct task_struct *task, unsigned int fd); +extern struct file *fget_task_next(struct task_struct *task, unsigned int *fd); extern void __f_unlock_pos(struct file *); struct fd fdget(unsigned int fd); -- cgit v1.2.3 From cab0515211f483e392d6862021ed008f49058561 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Jun 2024 17:48:36 -0400 Subject: move close_range(2) into fs/file.c, fold __close_range() into it We never had callers for __close_range() except for close_range(2) itself. Nothing of that sort has appeared in four years and if any users do show up, we can always separate those suckers again. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fdtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index e25e2cb65d30..c45306a9f007 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -111,7 +111,6 @@ int iterate_fd(struct files_struct *, unsigned, const void *); extern int close_fd(unsigned int fd); -extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern struct file *file_close_fd(unsigned int fd); extern struct kmem_cache *files_cachep; -- cgit v1.2.3 From dad6c45cbd40b57db95c9d46e01ff6d302e86042 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 16 Feb 2024 11:41:16 -0500 Subject: drm/amdkfd: Output migrate end event if migrate failed If page migration failed, also output migrate end event to match with migrate start event, with failure error_code added to the end of the migrate message macro. This will not break uAPI because application uses old message macro sscanf drop and ignore the error_code. Output GPU page fault restore end event if migration failed. Signed-off-by: Philip Yang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 717307d6b5b7..fa9f9846b88e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -609,6 +609,7 @@ struct kfd_ioctl_smi_events_args { * migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again + * error_code: migrate failure error code, 0 if no error */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -630,9 +631,9 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\ (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger) -#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\ - "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\ - (from), (to), (migrate_trigger) +#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger, error_code) \ + "%lld -%d @%lx(%lx) %x->%x %d %d\n", (ns), (pid), (start), (size),\ + (from), (to), (migrate_trigger), (error_code) #define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\ "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger) -- cgit v1.2.3 From a3ab2d45b9887ee609cd3bea39f668236935774c Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 30 Jul 2024 15:33:23 -0400 Subject: drm/amdkfd: SMI report dropped event count Add new SMI event to report the dropped event count. When the event kfifo is full, drop count is not zero, or no enough space left to store the event message, increase drop count. After reading event out from kfifo, if event was dropped, drop_count is not zero, generate a dropped event record and reset drop count to zero. Signed-off-by: Philip Yang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index fa9f9846b88e..7afd66d45313 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -530,6 +530,7 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + KFD_SMI_EVENT_DROPPED_EVENT = 12, /* * max event number, as a flag bit to get events from all processes, @@ -610,6 +611,7 @@ struct kfd_ioctl_smi_events_args { * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again * error_code: migrate failure error code, 0 if no error + * drop_count: how many events dropped when fifo is full */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -645,6 +647,10 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) +#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\ + "%lld -%d %d\n", (ns), (pid), (drop_count) + + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From 539770616521e5b046ca7612eb79ba11b53edb1d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 3 Oct 2024 12:52:17 +0100 Subject: net: dsa: remove obsolete phylink dsa_switch operations No driver now uses the DSA switch phylink members, so we can now remove the method pointers, but we need to leave empty shim functions to allow those drivers that do not provide phylink MAC operations structure to continue functioning. Signed-off-by: Russell King (oracle) Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean # sja1105, felix, dsa_loop Link: https://patch.msgid.link/E1swKNV-0060oN-1b@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index d7a6c2930277..72ae65e7246a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -885,21 +885,6 @@ struct dsa_switch_ops { */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); - struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, - int port, - phy_interface_t iface); - void (*phylink_mac_config)(struct dsa_switch *ds, int port, - unsigned int mode, - const struct phylink_link_state *state); - void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); - void (*phylink_mac_link_up)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface, - struct phy_device *phydev, - int speed, int duplex, - bool tx_pause, bool rx_pause); void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* -- cgit v1.2.3 From a3f5f4c2f9b6bc2aa6f5a3e8e23b7519e4f2e3e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Oct 2024 13:47:20 +0000 Subject: ipv4: remove fib_info_devhash[] Upcoming per-netns RTNL conversion needs to get rid of shared hash tables. fib_info_devhash[] is one of them. It is unclear why we used a hash table, because a single hlist_head per net device was cheaper and scalable. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241004134720.579244-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 49a7e7db0883..3baf8e539b6f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2211,6 +2211,9 @@ struct net_device { /* Protocol-specific pointers */ struct in_device __rcu *ip_ptr; + /** @fib_nh_head: nexthops associated with this netdev */ + struct hlist_head fib_nh_head; + #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif -- cgit v1.2.3 From 83134ef4609388f6b9ca31a384f531155196c2a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Oct 2024 12:13:31 +0200 Subject: netkit: Add option for scrubbing skb meta data Jordan reported that when running Cilium with netkit in per-endpoint-routes mode, network policy misclassifies traffic. In this direct routing mode of Cilium which is used in case of GKE/EKS/AKS, the Pod's BPF program to enforce policy sits on the netkit primary device's egress side. The issue here is that in case of netkit's netkit_prep_forward(), it will clear meta data such as skb->mark and skb->priority before executing the BPF program. Thus, identity data stored in there from earlier BPF programs (e.g. from tcx ingress on the physical device) gets cleared instead of being made available for the primary's program to process. While for traffic egressing the Pod via the peer device this might be desired, this is different for the primary one where compared to tcx egress on the host veth this information would be available. To address this, add a new parameter for the device orchestration to allow control of skb->mark and skb->priority scrubbing, to make the two accessible from BPF (and eventually leave it up to the program to scrub). By default, the current behavior is retained. For netkit peer this also enables the use case where applications could cooperate/signal intent to the BPF program. Note that struct netkit has a 4 byte hole between policy and bundle which is used here, in other words, struct netkit's first cacheline content used in fast-path does not get moved around. Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") Reported-by: Jordan Rife Signed-off-by: Daniel Borkmann Cc: Nikolay Aleksandrov Link: https://github.com/cilium/cilium/issues/34042 Acked-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20241004101335.117711-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- include/uapi/linux/if_link.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6dc258993b17..2acc7687e017 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1292,6 +1292,19 @@ enum netkit_mode { NETKIT_L3, }; +/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to + * the BPF program if attached. This also means the latter can + * consume the two fields if they were populated earlier. + * + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before + * invoking the attached BPF program when the peer device resides + * in a different network namespace. This is the default behavior. + */ +enum netkit_scrub { + NETKIT_SCRUB_NONE, + NETKIT_SCRUB_DEFAULT, +}; + enum { IFLA_NETKIT_UNSPEC, IFLA_NETKIT_PEER_INFO, @@ -1299,6 +1312,8 @@ enum { IFLA_NETKIT_POLICY, IFLA_NETKIT_PEER_POLICY, IFLA_NETKIT_MODE, + IFLA_NETKIT_SCRUB, + IFLA_NETKIT_PEER_SCRUB, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) -- cgit v1.2.3 From 754e707e20e47482384f6e64eb3af08273292010 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 2 Oct 2024 17:46:07 -0700 Subject: drm/print: Introduce drm_line_printer This drm printer wrapper can be used to increase the robustness of the captured output generated by any other drm_printer to make sure we didn't lost any intermediate lines of the output by adding line numbers to each output line. Helpful for capturing some crash data. v2: Extended short int counters to full int (JohnH) Signed-off-by: Michal Wajdeczko Signed-off-by: John Harrison Cc: Jani Nikula Cc: dri-devel@lists.freedesktop.org Reviewed-by: Jani Nikula Acked-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20241003004611.2323493-8-John.C.Harrison@Intel.com --- include/drm/drm_print.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index d2676831d765..b3906dc04388 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -177,6 +177,10 @@ struct drm_printer { void *arg; const void *origin; const char *prefix; + struct { + unsigned int series; + unsigned int counter; + } line; enum drm_debug_category category; }; @@ -187,6 +191,7 @@ void __drm_puts_seq_file(struct drm_printer *p, const char *str); void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf); void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf); void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf); +void __drm_printfn_line(struct drm_printer *p, struct va_format *vaf); __printf(2, 3) void drm_printf(struct drm_printer *p, const char *f, ...); @@ -411,6 +416,65 @@ static inline struct drm_printer drm_err_printer(struct drm_device *drm, return p; } +/** + * drm_line_printer - construct a &drm_printer that prefixes outputs with line numbers + * @p: the &struct drm_printer which actually generates the output + * @prefix: optional output prefix, or NULL for no prefix + * @series: optional unique series identifier, or 0 to omit identifier in the output + * + * This printer can be used to increase the robustness of the captured output + * to make sure we didn't lost any intermediate lines of the output. Helpful + * while capturing some crash data. + * + * Example 1:: + * + * void crash_dump(struct drm_device *drm) + * { + * static unsigned int id; + * struct drm_printer p = drm_err_printer(drm, "crash"); + * struct drm_printer lp = drm_line_printer(&p, "dump", ++id); + * + * drm_printf(&lp, "foo"); + * drm_printf(&lp, "bar"); + * } + * + * Above code will print into the dmesg something like:: + * + * [ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.1: foo + * [ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.2: bar + * + * Example 2:: + * + * void line_dump(struct device *dev) + * { + * struct drm_printer p = drm_info_printer(dev); + * struct drm_printer lp = drm_line_printer(&p, NULL, 0); + * + * drm_printf(&lp, "foo"); + * drm_printf(&lp, "bar"); + * } + * + * Above code will print:: + * + * [ ] 0000:00:00.0: [drm] 1: foo + * [ ] 0000:00:00.0: [drm] 2: bar + * + * RETURNS: + * The &drm_printer object + */ +static inline struct drm_printer drm_line_printer(struct drm_printer *p, + const char *prefix, + unsigned int series) +{ + struct drm_printer lp = { + .printfn = __drm_printfn_line, + .arg = p, + .prefix = prefix, + .line = { .series = series, }, + }; + return lp; +} + /* * struct device based logging * -- cgit v1.2.3 From 56c594d8df64e726e803652ee9f4ab08659d4574 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 3 Oct 2024 14:43:09 +0200 Subject: drm: add DRM_SET_CLIENT_NAME ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Giving the opportunity to userspace to associate a free-form name with a drm_file struct is helpful for tracking and debugging. This is similar to the existing DMA_BUF_SET_NAME ioctl. Access to client_name is protected by a mutex, and the 'clients' debugfs file has been updated to print it. Userspace MR to use this ioctl: https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1428 If the string passed by userspace contains chars that would mess up output when it's going to be printed (in dmesg, fdinfo, etc), -EINVAL is returned. A 0-length string is a valid use, and clears the existing name. Reviewed-by: Tvrtko Ursulin Reviewed-by: Dmitry Osipenko Signed-off-by: Pierre-Eric Pelloux-Prayer Link: https://patchwork.freedesktop.org/patch/msgid/20241003124506.470931-2-pierre-eric.pelloux-prayer@amd.com Reviewed-by: Christian König Signed-off-by: Christian König --- include/drm/drm_file.h | 9 +++++++++ include/uapi/drm/drm.h | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 8c0030c77308..d4f1c115ea0f 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -388,6 +388,15 @@ struct drm_file { * Per-file buffer caches used by the PRIME buffer sharing code. */ struct drm_prime_file_private prime; + + /** + * @client_name: + * + * Userspace-provided name; useful for accounting and debugging. + */ + const char *client_name; + /** @name_lock: Protects @client_name. */ + struct mutex client_name_lock; }; /** diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 16122819edfe..7fba37b94401 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence { __u64 user_data; /* user data passed to event */ }; +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + #if defined(__cplusplus) } #endif @@ -1288,6 +1295,16 @@ extern "C" { */ #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. -- cgit v1.2.3 From 20a4da20e0bd0297bf1ce083362e78b6702f991e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 4 Oct 2024 11:01:00 +0200 Subject: net: phy: Add support for PHY timing-role configuration via device tree Introduce support for configuring the master/slave role of PHYs based on the `timing-role` property in the device tree. While this functionality is necessary for Single Pair Ethernet (SPE) PHYs (1000/100/10Base-T1) where hardware strap pins may be unavailable or incorrectly set, it works for any PHY type. Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Divya Koppera Signed-off-by: Paolo Abeni --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index a98bc91a0cde..ff762a3d8270 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1260,6 +1260,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); void of_set_phy_supported(struct phy_device *phydev); void of_set_phy_eee_broken(struct phy_device *phydev); +void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); /** -- cgit v1.2.3 From 95397784be23f66c5d4280b0a4c4e0d1ee74f2ef Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 Sep 2024 10:42:31 +0200 Subject: media: staging: drop omap4iss The omap4 camera driver has seen no progress since forever, and now OMAP4 support has also been dropped from u-boot (1). So it is time to retire this driver. (1): https://lists.denx.de/pipermail/u-boot/2024-July/558846.html Signed-off-by: Hans Verkuil Acked-by: Laurent Pinchart --- include/linux/platform_data/media/omap4iss.h | 66 ---------------------------- 1 file changed, 66 deletions(-) delete mode 100644 include/linux/platform_data/media/omap4iss.h (limited to 'include') diff --git a/include/linux/platform_data/media/omap4iss.h b/include/linux/platform_data/media/omap4iss.h deleted file mode 100644 index 2a511a8fcda7..000000000000 --- a/include/linux/platform_data/media/omap4iss.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH_ARM_PLAT_OMAP4_ISS_H -#define ARCH_ARM_PLAT_OMAP4_ISS_H - -#include - -struct iss_device; - -enum iss_interface_type { - ISS_INTERFACE_CSI2A_PHY1, - ISS_INTERFACE_CSI2B_PHY2, -}; - -/** - * struct iss_csiphy_lane: CSI2 lane position and polarity - * @pos: position of the lane - * @pol: polarity of the lane - */ -struct iss_csiphy_lane { - u8 pos; - u8 pol; -}; - -#define ISS_CSIPHY1_NUM_DATA_LANES 4 -#define ISS_CSIPHY2_NUM_DATA_LANES 1 - -/** - * struct iss_csiphy_lanes_cfg - CSI2 lane configuration - * @data: Configuration of one or two data lanes - * @clk: Clock lane configuration - */ -struct iss_csiphy_lanes_cfg { - struct iss_csiphy_lane data[ISS_CSIPHY1_NUM_DATA_LANES]; - struct iss_csiphy_lane clk; -}; - -/** - * struct iss_csi2_platform_data - CSI2 interface platform data - * @crc: Enable the cyclic redundancy check - * @vpclk_div: Video port output clock control - */ -struct iss_csi2_platform_data { - unsigned crc:1; - unsigned vpclk_div:2; - struct iss_csiphy_lanes_cfg lanecfg; -}; - -struct iss_subdev_i2c_board_info { - struct i2c_board_info *board_info; - int i2c_adapter_id; -}; - -struct iss_v4l2_subdevs_group { - struct iss_subdev_i2c_board_info *subdevs; - enum iss_interface_type interface; - union { - struct iss_csi2_platform_data csi2; - } bus; /* gcc < 4.6.0 chokes on anonymous union initializers */ -}; - -struct iss_platform_data { - struct iss_v4l2_subdevs_group *subdevs; - void (*set_constraints)(struct iss_device *iss, bool enable); -}; - -#endif -- cgit v1.2.3 From ec763c234d7f60c5bce0fa2611ba79f5be1af76b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:28 -0700 Subject: Revert "rtnetlink: add guard for RTNL" This reverts commit 464eb03c4a7cfb32cb3324249193cf6bb5b35152. Once we have a per-netns RTNL, we won't use guard(rtnl). Also, there's no users for now. $ grep -rnI "guard(rtnl" || true $ Suggested-by: Eric Dumazet Link: https://lore.kernel.org/netdev/CANn89i+KoYzUH+VPLdGmLABYf5y4TW0hrM4UAeQQJ9AREty0iw@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a7da7dfc06a2..cdfc897f1e3c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,7 +7,6 @@ #include #include #include -#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -47,8 +46,6 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); -DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock()) - extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; -- cgit v1.2.3 From 76aed95319da25d6884dff01d5f0149e4b542f96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:29 -0700 Subject: rtnetlink: Add per-netns RTNL. The goal is to break RTNL down into per-netns mutex. This patch adds per-netns mutex and its helper functions, rtnl_net_lock() and rtnl_net_unlock(). rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and rtnl_net_unlock() releases them. We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes rtnl_lock() in rtnl_net_lock(). When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(), and its locking order is defined by rtnl_net_lock_cmp_fn() as follows: 1. init_net is first 2. netns address ascending order Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL with LOCKDEP so that we can carefully add the extra mutex without slowing down RTNL operations during conversion. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 21 +++++++++++++++++++++ include/net/net_namespace.h | 4 ++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index cdfc897f1e3c..edd840a49989 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void) #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net); +void __rtnl_net_unlock(struct net *net); +void rtnl_net_lock(struct net *net); +void rtnl_net_unlock(struct net *net); +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); +#else +static inline void __rtnl_net_lock(struct net *net) {} +static inline void __rtnl_net_unlock(struct net *net) {} + +static inline void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); +} + +static inline void rtnl_net_unlock(struct net *net) +{ + rtnl_unlock(); +} +#endif + static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e67b483cc8bb..873c0f9fdac6 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -188,6 +188,10 @@ struct net { #if IS_ENABLED(CONFIG_SMC) struct netns_smc smc; #endif +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + /* Move to a better place when the config guard is removed. */ + struct mutex rtnl_mutex; +#endif } __randomize_layout; #include -- cgit v1.2.3 From 844e5e7e656d3a7a904fd5607f8491d6fd01db8e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:30 -0700 Subject: rtnetlink: Add assertion helpers for per-netns RTNL. Once an RTNL scope is converted with rtnl_net_lock(), we will replace RTNL helper functions inside the scope with the following per-netns alternatives: ASSERT_RTNL() -> ASSERT_RTNL_NET(net) rcu_dereference_rtnl(p) -> rcu_dereference_rtnl_net(net, p) Note that the per-netns helpers are equivalent to the conventional helpers unless CONFIG_DEBUG_NET_SMALL_RTNL is enabled. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index edd840a49989..8468a4ce8510 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -51,6 +51,10 @@ extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; +#define ASSERT_RTNL() \ + WARN_ONCE(!rtnl_is_locked(), \ + "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) + #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else @@ -98,6 +102,22 @@ void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); + +bool rtnl_net_is_locked(struct net *net); + +#define ASSERT_RTNL_NET(net) \ + WARN_ONCE(!rtnl_net_is_locked(net), \ + "RTNL_NET: assertion failed at %s (%d)\n", \ + __FILE__, __LINE__) + +bool lockdep_rtnl_net_is_held(struct net *net); + +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) +#define rtnl_net_dereference(net, p) \ + rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) #else static inline void __rtnl_net_lock(struct net *net) {} static inline void __rtnl_net_unlock(struct net *net) {} @@ -111,6 +131,27 @@ static inline void rtnl_net_unlock(struct net *net) { rtnl_unlock(); } + +static inline void ASSERT_RTNL_NET(struct net *net) +{ + ASSERT_RTNL(); +} + +static inline void *rcu_dereference_rtnl_net(struct net *net, void *p) +{ + return rcu_dereference_rtnl(p); +} + +static inline void *rtnl_net_dereference(struct net *net, void *p) +{ + return rtnl_dereference(p); +} + +static inline void *rcu_replace_pointer_rtnl_net(struct net *net, + void *rp, void *p) +{ + return rcu_replace_pointer_rtnl(rp, p); +} #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) @@ -140,10 +181,6 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); -#define ASSERT_RTNL() \ - WARN_ONCE(!rtnl_is_locked(), \ - "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) - extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, -- cgit v1.2.3 From 9ab440a9d0426cf7842240891cc457155db1a97e Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 7 Oct 2024 08:41:44 -0700 Subject: drm/xe/ptl: L3bank mask is not available on the media GT On PTL platforms with media version 30.00, the fuse registers for reporting L3 bank availability to the GT just read out as ~0 and do not provide proper values. Xe does not use the L3 bank mask for anything internally; it only passes the mask through to userspace via the GT topology query. Since we don't have any way to get the real L3 bank mask, we don't want to pass garbage to userspace. Passing a zeroed mask or a copy of the primary GT's L3 bank mask would also be inaccurate and likely to cause confusion for userspace. The best approach is to simply not include L3 in the list of masks returned by the topology query in cases where we aren't able to provide a meaningful value. This won't change the behavior for any existing platforms (where we can always obtain L3 masks successfully for all GTs), it will only prevent us from mis-reporting bad information on upcoming platform(s). There's a good chance this will become a formal workaround in the future, but for now we don't have a lineage number so "no_media_l3" is used in place of a lineage as the OOB workaround descriptor. v2: - Re-calculate query size to properly match data returned. (Gustavo) - Update kerneldoc to clarify that the L3bank mask may not be included in the query results if the hardware doesn't make it available. (Gustavo) Cc: Matt Atwood Cc: Gustavo Sousa Signed-off-by: Shekhar Chauhan Co-developed-by: Matt Roper Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Reviewed-by: Gustavo Sousa Acked-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20241007154143.2021124-2-matthew.d.roper@intel.com --- include/uapi/drm/xe_drm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b6fbe4988f2e..c4182e95a619 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -512,7 +512,9 @@ struct drm_xe_query_gt_list { * containing the following in mask: * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` * means 32 DSS are available for compute. - * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks. This type + * may be omitted if the driver is unable to query the mask from the + * hardware. * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: -- cgit v1.2.3 From a8f2cdd27d114ed6c3354a0e39502e6d56215804 Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Mon, 26 Aug 2024 16:04:23 +0300 Subject: media: v4l: Add luma 16-bit interlaced pixel format The formats added by this patch are: V4L2_PIX_FMT_Y16I Interlaced lumina format primary use in RealSense Depth cameras with stereo stream for left and right image sensors. Signed-off-by: Dmitry Perchanov Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/568efbd75290e286b8ad9e7347b5f43745121020.camel@intel.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 27239cb64065..21a8aa575ea3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -798,6 +798,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_S5C_UYVY_JPG v4l2_fourcc('S', '5', 'C', 'I') /* S5C73M3 interleaved UYVY/JPEG */ #define V4L2_PIX_FMT_Y8I v4l2_fourcc('Y', '8', 'I', ' ') /* Greyscale 8-bit L/R interleaved */ #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ +#define V4L2_PIX_FMT_Y16I v4l2_fourcc('Y', '1', '6', 'I') /* Greyscale 16-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ #define V4L2_PIX_FMT_MM21 v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */ -- cgit v1.2.3 From 55b834873e800a5809787ce6aedcb70ee49a596f Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Mon, 26 Aug 2024 16:05:04 +0300 Subject: media: uvcvideo: Add luma 16-bit interlaced pixel format The formats added by this patch are: UVC_GUID_FORMAT_Y16I Interlaced lumina format primary use in RealSense Depth cameras with stereo stream for left and right image sensors. Signed-off-by: Dmitry Perchanov Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/a717a912035b0a0f82b2f35719cca0c5269e995f.camel@intel.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/linux/usb/uvc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 88d96095bcb1..1c16be20c966 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -118,6 +118,9 @@ #define UVC_GUID_FORMAT_Y12I \ { 'Y', '1', '2', 'I', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y16I \ + { 'Y', '1', '6', 'I', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} #define UVC_GUID_FORMAT_Z16 \ { 'Z', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -- cgit v1.2.3 From a7e742e416bc331c0f409d6a3630471896e696d3 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 18 Sep 2024 20:05:39 +0200 Subject: media: uvcvideo: Add support for the D3DFMT_R5G6B5 pixmap type This media format is used by the NXP Semiconductors 1fc9:009b chipset, used by the Kaiweets KTI-W02 infrared camera. Signed-off-by: David Given Reviewed-by: Ricardo Ribalda Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20240918180540.10830-1-dg@cowlark.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/linux/usb/uvc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 1c16be20c966..bce95153e5a6 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -143,6 +143,9 @@ #define UVC_GUID_FORMAT_D3DFMT_L8 \ {0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_D3DFMT_R5G6B5 \ + {0x7b, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} #define UVC_GUID_FORMAT_KSMEDIA_L8_IR \ {0x32, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -- cgit v1.2.3 From 2298d8a81f2dc6987448e5ddd823f4892194f5b6 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Mon, 7 Oct 2024 18:35:08 -0700 Subject: drm/xe/ptl: Add PTL platform definition PTL is an integrated GPU based on the Xe3 architecture. v2: explicitly turn off display until display patches land. Bspec: 72574 Cc: Matt Roper Signed-off-by: Haridhar Kalvala Signed-off-by: Matt Atwood Reviewed-by: Shekhar Chauhan Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241008013509.61233-6-matthew.s.atwood@intel.com --- include/drm/intel/xe_pciids.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 65520a90c17c..6d8d013f74e0 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -220,4 +220,15 @@ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__) +#define XE_PTL_IDS(MACRO__, ...) \ + MACRO__(0xB080, ## __VA_ARGS__), \ + MACRO__(0xB081, ## __VA_ARGS__), \ + MACRO__(0xB082, ## __VA_ARGS__), \ + MACRO__(0xB090, ## __VA_ARGS__), \ + MACRO__(0xB091, ## __VA_ARGS__), \ + MACRO__(0xB092, ## __VA_ARGS__), \ + MACRO__(0xB0A0, ## __VA_ARGS__), \ + MACRO__(0xB0A1, ## __VA_ARGS__), \ + MACRO__(0xB0A2, ## __VA_ARGS__) + #endif -- cgit v1.2.3 From 36e69b160705b65bf136c2fb6a1194447eeb8478 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Sun, 29 Sep 2024 11:21:16 +0200 Subject: driver core: Add device probe log helper dev_warn_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some drivers can still provide their functionality to a certain extent even when some of their resource acquisitions eventually fail. In such cases, emitting errors isn't the desired action, but warnings should be emitted instead. To solve this, introduce dev_warn_probe() as a new device probe log helper, which behaves identically as the already existing dev_err_probe(), while it produces warnings instead of errors. The intended use is with the resources that are actually optional for a particular driver. While there, copyedit the kerneldoc for dev_err_probe() a bit, to simplify its wording a bit, and reuse it as the kerneldoc for dev_warn_probe(), with the necessary wording adjustments, of course. Signed-off-by: Dragan Simic Tested-by: Hélène Vulquin Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2be0a28538bb2a3d1bcc91e2ca1f2d0dc09146d9.1727601608.git.dsimic@manjaro.org Signed-off-by: Mark Brown --- include/linux/dev_printk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index ca32b5bb28eb..eb2094e43050 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -276,6 +276,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +__printf(3, 4) int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...); /* Simple helper for dev_err_probe() when ERR_PTR() is to be returned. */ #define dev_err_ptr_probe(dev, ___err, fmt, ...) \ -- cgit v1.2.3 From 0b028ff7e70ecbe5240ad92e36a664af5cf7f382 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 6 Oct 2024 23:55:11 +0100 Subject: auxdisplay: Remove unused functions cfag12864b_getrate() and cfag12864b_isenabled() were both added in commit 70e840499aae ("[PATCH] drivers: add LCD support") but never used. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Geert Uytterhoeven Acked-by: Miguel Ojeda Signed-off-by: Andy Shevchenko --- include/linux/cfag12864b.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h index 6617d9c68d86..83e6613d12ae 100644 --- a/include/linux/cfag12864b.h +++ b/include/linux/cfag12864b.h @@ -27,13 +27,6 @@ */ extern unsigned char * cfag12864b_buffer; -/* - * Get the refresh rate of the LCD - * - * Returns the refresh rate (hertz). - */ -extern unsigned int cfag12864b_getrate(void); - /* * Enable refreshing * @@ -49,16 +42,6 @@ extern unsigned char cfag12864b_enable(void); */ extern void cfag12864b_disable(void); -/* - * Is enabled refreshing? (is anyone using the module?) - * - * Returns 0 if refreshing is not enabled (anyone is using it), - * or != 0 if refreshing is enabled (someone is using it). - * - * Useful for buffer read-only modules. - */ -extern unsigned char cfag12864b_isenabled(void); - /* * Is the module inited? */ -- cgit v1.2.3 From 581434654e01ec79dd02c21448ac84e2ce2d1a64 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 11:24:58 +0000 Subject: workqueue: Adjust WQ_MAX_ACTIVE from 512 to 2048 WQ_MAX_ACTIVE is currently set to 512, which was established approximately 15 yeas ago. However, with the significant increase in machine sizes and capabilities, the previous limit of 256 concurrent tasks is no longer sufficient. Therefore, we propose to increase WQ_MAX_ACTIVE to 2048. and WQ_DFL_ACTIVE is 1024 now. Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 59c2695e12e7..b0dc957c3e56 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -412,7 +412,7 @@ enum wq_flags { }; enum wq_consts { - WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ + WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, -- cgit v1.2.3 From aefa398d93d5db7c555be78a605ff015357f127d Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Wed, 2 Oct 2024 11:47:16 -0700 Subject: cgroup/rstat: Tracking cgroup-level niced CPU time Cgroup-level CPU statistics currently include time spent on user/system processes, but do not include niced CPU time (despite already being tracked). This patch exposes niced CPU time to the userspace, allowing users to get a better understanding of their hardware limits and can facilitate more informed workload distribution. A new field 'ntime' is added to struct cgroup_base_stat as opposed to struct task_cputime to minimize footprint. Signed-off-by: Joshua Hahn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 47ae4c4d924c..0a80ef9191a6 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -327,6 +327,7 @@ struct cgroup_base_stat { #ifdef CONFIG_SCHED_CORE u64 forceidle_sum; #endif + u64 ntime; }; /* -- cgit v1.2.3 From 49e4154f4b16345da5e219b23ed9737a6e735bc1 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 11 Sep 2024 09:00:26 +0800 Subject: tracing: Remove TRACE_EVENT_FL_FILTERED logic After commit dcb0b5575d24 ("tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic"), no one's going to set the TRACE_EVENT_FL_FILTERED or change the call->filter, so remove related logic. Link: https://lore.kernel.org/20240911010026.2302849-1-zhengyejian@huaweicloud.com Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 42bedcddd511..f8f2e52653df 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -326,7 +326,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { - TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, @@ -341,7 +340,6 @@ enum { /* * Event flags: - * FILTERED - The event has a filter attached * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file @@ -356,7 +354,6 @@ enum { * to a tracepoint yet, then it is cleared when it is. */ enum { - TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), @@ -381,7 +378,6 @@ struct trace_event_call { }; struct trace_event event; char *print_fmt; - struct event_filter *filter; /* * Static events can disappear with modules, * where as dynamic ones need their own ref count. -- cgit v1.2.3 From 02f220b5267042d0de649614eec84ded8aeecb4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 20:26:55 +0200 Subject: wifi: ipw2x00/lib80211: move remaining lib80211 into libipw There's already much code in libipw that used to be shared with more drivers, but now with the prior cleanups, those old Intel ipw2x00 drivers are also the only ones using whatever is now left of lib80211. Move lib80211 entirely into libipw. Link: https://patch.msgid.link/20241007202707.915ef7b9e7c7.Ib9876d2fe3c90f11d6df458b16d0b7d4bf551a8d@changeid Signed-off-by: Johannes Berg --- include/net/lib80211.h | 122 ------------------------------------------------- 1 file changed, 122 deletions(-) delete mode 100644 include/net/lib80211.h (limited to 'include') diff --git a/include/net/lib80211.h b/include/net/lib80211.h deleted file mode 100644 index fd0f15d87d80..000000000000 --- a/include/net/lib80211.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * lib80211.h -- common bits for IEEE802.11 wireless drivers - * - * Copyright (c) 2008, John W. Linville - * - * Some bits copied from old ieee80211 component, w/ original copyright - * notices below: - * - * Original code based on Host AP (software wireless LAN access point) driver - * for Intersil Prism2/2.5/3. - * - * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - * - * Copyright (c) 2002-2003, Jouni Malinen - * - * Adaption to a generic IEEE 802.11 stack by James Ketrenos - * - * - * Copyright (c) 2004, Intel Corporation - * - */ - -#ifndef LIB80211_H -#define LIB80211_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#define NUM_WEP_KEYS 4 - -enum { - IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), -}; - -struct module; - -struct lib80211_crypto_ops { - const char *name; - struct list_head list; - - /* init new crypto context (e.g., allocate private data space, - * select IV, etc.); returns NULL on failure or pointer to allocated - * private data on success */ - void *(*init) (int keyidx); - - /* deinitialize crypto context and free allocated private data */ - void (*deinit) (void *priv); - - /* encrypt/decrypt return < 0 on error or >= 0 on success. The return - * value from decrypt_mpdu is passed as the keyidx value for - * decrypt_msdu. skb must have enough head and tail room for the - * encryption; if not, error will be returned; these functions are - * called for all MPDUs (i.e., fragments). - */ - int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - - /* These functions are called for full MSDUs, i.e. full frames. - * These can be NULL if full MSDU operations are not needed. */ - int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, - void *priv); - - int (*set_key) (void *key, int len, u8 * seq, void *priv); - int (*get_key) (void *key, int len, u8 * seq, void *priv); - - /* procfs handler for printing out key information and possible - * statistics */ - void (*print_stats) (struct seq_file *m, void *priv); - - /* Crypto specific flag get/set for configuration settings */ - unsigned long (*get_flags) (void *priv); - unsigned long (*set_flags) (unsigned long flags, void *priv); - - /* maximum number of bytes added by encryption; encrypt buf is - * allocated with extra_prefix_len bytes, copy of in_buf, and - * extra_postfix_len; encrypt need not use all this space, but - * the result must start at the beginning of the buffer and correct - * length must be returned */ - int extra_mpdu_prefix_len, extra_mpdu_postfix_len; - int extra_msdu_prefix_len, extra_msdu_postfix_len; - - struct module *owner; -}; - -struct lib80211_crypt_data { - struct list_head list; /* delayed deletion list */ - const struct lib80211_crypto_ops *ops; - void *priv; - atomic_t refcnt; -}; - -struct lib80211_crypt_info { - char *name; - /* Most clients will already have a lock, - so just point to that. */ - spinlock_t *lock; - - struct lib80211_crypt_data *crypt[NUM_WEP_KEYS]; - int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ - struct list_head crypt_deinit_list; - struct timer_list crypt_deinit_timer; - int crypt_quiesced; -}; - -int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, - spinlock_t *lock); -void lib80211_crypt_info_free(struct lib80211_crypt_info *info); -int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops); -int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops); -const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); -void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, - struct lib80211_crypt_data **crypt); - -#endif /* LIB80211_H */ -- cgit v1.2.3 From 3a1d429ebd43bcfdf3590096ca72cbf593d1598b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:02:53 +0200 Subject: wifi: wext/libipw: move spy implementation to libipw There's no driver left using this other than ipw2200, so move the data bookkeeping and code into libipw. Link: https://patch.msgid.link/20241007210254.037d864cda7d.Ib2197cb056ff05746d3521a5fba637062acb7314@changeid Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 7af1082ea9a0..a7b502958d27 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -418,8 +418,6 @@ struct iw_spy_data { struct libipw_device; /* The struct */ struct iw_public_data { - /* Driver enhanced spy support */ - struct iw_spy_data * spy_data; /* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */ struct libipw_device * libipw; }; @@ -443,22 +441,6 @@ static inline void wireless_nlevent_flush(void) {} /* We may need a function to send a stream of events to user space. * More on that later... */ -/* Standard handler for SIOCSIWSPY */ -int iw_handler_set_spy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Standard handler for SIOCGIWSPY */ -int iw_handler_get_spy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Standard handler for SIOCSIWTHRSPY */ -int iw_handler_set_thrspy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Standard handler for SIOCGIWTHRSPY */ -int iw_handler_get_thrspy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Driver call to update spy records */ -void wireless_spy_update(struct net_device *dev, unsigned char *address, - struct iw_quality *wstats); - /************************* INLINE FUNCTIONS *************************/ /* * Function that are so simple that it's more efficient inlining them -- cgit v1.2.3 From 836265d31631e28000fc8917ce697fc687a58724 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:35:25 +0200 Subject: wifi: remove iw_public_data from struct net_device Given the previous patches, we no longer need the struct iw_public_data etc., it's only used by the old Intel drivers (and ps3_gelic creates it but then doesn't use it). Remove all of that, including the pointer in struct net_device. Link: https://patch.msgid.link/20241007213525.8b2d52b60531.I6a27aaf30bded9a0977f07f47fba2bd31a3b3330@changeid Signed-off-by: Johannes Berg --- include/linux/netdevice.h | 2 -- include/net/iw_handler.h | 18 ------------------ 2 files changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e87b5e488325..f0abed1e6e45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1773,7 +1773,6 @@ enum netdev_reg_state { * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see for details. - * @wireless_data: Instance data managed by the core of wireless extensions * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions @@ -2150,7 +2149,6 @@ struct net_device { #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def *wireless_handlers; - struct iw_public_data *wireless_data; #endif const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index a7b502958d27..fc44fcca1d5c 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -404,24 +404,6 @@ struct iw_spy_data { u_char spy_thr_under[IW_MAX_SPY]; }; -/* --------------------- DEVICE WIRELESS DATA --------------------- */ -/* - * This is all the wireless data specific to a device instance that - * is managed by the core of Wireless Extensions or the 802.11 layer. - * We only keep pointer to those structures, so that a driver is free - * to share them between instances. - * This structure should be initialised before registering the device. - * Access to this data follow the same rules as any other struct net_device - * data (i.e. valid as long as struct net_device exist, same locking rules). - */ -/* Forward declaration */ -struct libipw_device; -/* The struct */ -struct iw_public_data { - /* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */ - struct libipw_device * libipw; -}; - /**************************** PROTOTYPES ****************************/ /* * Functions part of the Wireless Extensions (defined in net/wireless/wext-core.c). -- cgit v1.2.3 From aee809aaa2d13bf560fe38d28c4969605e6d9d0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:47:16 +0200 Subject: wifi: cfg80211: unexport wireless_nlevent_flush() This no longer needs to be exported, so don't export it. Link: https://patch.msgid.link/20241007214715.3dd736dc3ac0.I1388536e99c37f28a007dd753c473ad21513d9a9@changeid Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index fc44fcca1d5c..804587b7592b 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -413,12 +413,6 @@ struct iw_spy_data { /* Send a single event to user space */ void wireless_send_event(struct net_device *dev, unsigned int cmd, union iwreq_data *wrqu, const char *extra); -#ifdef CONFIG_WEXT_CORE -/* flush all previous wext events - if work is done from netdev notifiers */ -void wireless_nlevent_flush(void); -#else -static inline void wireless_nlevent_flush(void) {} -#endif /* We may need a function to send a stream of events to user space. * More on that later... */ -- cgit v1.2.3 From 9e1a98aac11b757e2634304bbe6bed5f5a58e0ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:50:25 +0200 Subject: wifi: wext: merge adjacent CONFIG_COMPAT ifdef blocks Simplify this, and also add a comment at the #endif. Link: https://patch.msgid.link/20241007215025.5ecdad1e02ed.I54efa895efc496e06ba41e1c39c9df9e23b0171f@changeid Signed-off-by: Johannes Berg --- include/linux/wireless.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index e6e34d74dda0..03e5d3fe226d 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -21,8 +21,7 @@ struct compat_iw_point { __u16 length; __u16 flags; }; -#endif -#ifdef CONFIG_COMPAT + struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ @@ -49,5 +48,5 @@ struct __compat_iw_event { #define IW_EV_COMPAT_POINT_LEN \ (IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \ IW_EV_COMPAT_POINT_OFF) -#endif +#endif /* CONFIG_COMPAT */ #endif /* _LINUX_WIRELESS_H */ -- cgit v1.2.3 From ff919efb5fe8256fba132b5f2c58df3c38bdd0b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 22:00:03 +0200 Subject: wireless: wext: shorten struct iw_ioctl_description There's no need for "future" extensions in an internal struct, and we don't need a u32 for flags, use just a u8. Also remove the unused IW_DESCR_FLAG_WAIT flag. Link: https://patch.msgid.link/20241007220003.309bd52fa763.I9a1229fa7f2be53d4f50e63671ed441d0968bb41@changeid Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 804587b7592b..c9b46b996197 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -279,8 +279,6 @@ #define IW_DESCR_FLAG_RESTRICT 0x0004 /* GET : request is ROOT only */ /* SET : Omit payload from generated iwevent */ #define IW_DESCR_FLAG_NOMAX 0x0008 /* GET : no limit on request size */ -/* Driver level flags */ -#define IW_DESCR_FLAG_WAIT 0x0100 /* Wait for driver event */ /****************************** TYPES ******************************/ @@ -373,11 +371,10 @@ struct iw_handler_def { */ struct iw_ioctl_description { __u8 header_type; /* NULL, iw_point or other */ - __u8 token_type; /* Future */ + __u8 flags; /* Special handling of the request */ __u16 token_size; /* Granularity of payload */ __u16 min_tokens; /* Min acceptable token number */ __u16 max_tokens; /* Max acceptable token number */ - __u32 flags; /* Special handling of the request */ }; /* Need to think of short header translation table. Later. */ -- cgit v1.2.3 From da5e06dee58ad153a4933fd40fc53d571bfef373 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sun, 6 Oct 2024 07:26:09 +0900 Subject: net-timestamp: namespacify the sysctl_tstamp_allow_data Let it be tuned in per netns by admins. Signed-off-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241005222609.94980-1-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netns/core.h | 1 + include/net/sock.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 78214f1b43a2..9b36f0ff0c20 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -15,6 +15,7 @@ struct netns_core { int sysctl_somaxconn; int sysctl_optmem_max; u8 sysctl_txrehash; + u8 sysctl_tstamp_allow_data; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/include/net/sock.h b/include/net/sock.h index e282127092ab..b32f1424ecc5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2824,8 +2824,6 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo); extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern int sysctl_tstamp_allow_data; - extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; -- cgit v1.2.3 From 3fe3dbaf26723c473d42a58b636a2500586b821e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 7 Oct 2024 01:44:56 +0100 Subject: caif: Remove unused cfsrvl_getphyid cfsrvl_getphyid() has been unused since 2011's commit f36214408470 ("caif: Use RCU and lists in cfcnfg.c for managing caif link layers") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241007004456.149899-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/caif/cfsrvl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h index 5ee7b322e18b..a000dc45f966 100644 --- a/include/net/caif/cfsrvl.h +++ b/include/net/caif/cfsrvl.h @@ -40,7 +40,6 @@ void cfsrvl_init(struct cfsrvl *service, struct dev_info *dev_info, bool supports_flowctrl); bool cfsrvl_ready(struct cfsrvl *service, int *err); -u8 cfsrvl_getphyid(struct cflayer *layer); static inline void cfsrvl_get(struct cflayer *layr) { -- cgit v1.2.3 From 4a8840af5f53f2902eba91130fae650879f18e7a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Oct 2024 12:17:19 -0700 Subject: tracepoints: Use new static branch API The old static key API is deprecated. Switch to the new one. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Alice Ryhl Link: https://lore.kernel.org/7a08dae3c5eddb14b13864923c1b58ac1f4af83c.1728414936.git.jpoimboe@kernel.org Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint-defs.h | 4 ++-- include/linux/tracepoint.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 4dc4955f0fbf..60a6e8314d4c 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -31,7 +31,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - struct static_key key; + struct static_key_false key; struct static_call_key *static_call_key; void *static_call_tramp; void *iterator; @@ -83,7 +83,7 @@ struct bpf_raw_event_map { #ifdef CONFIG_TRACEPOINTS # define tracepoint_enabled(tp) \ - static_key_false(&(__tracepoint_##tp).key) + static_branch_unlikely(&(__tracepoint_##tp).key) #else # define tracepoint_enabled(tracepoint) false #endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 93a9f3070b48..2a29334bbc02 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -248,7 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_RCU(name, proto, args, cond) \ static inline void trace_##name##_rcuidle(proto) \ { \ - if (static_key_false(&__tracepoint_##name.key)) \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ TP_CONDITION(cond), 1); \ @@ -274,7 +274,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (static_key_false(&__tracepoint_##name.key)) \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ TP_CONDITION(cond), 0); \ @@ -311,7 +311,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline bool \ trace_##name##_enabled(void) \ { \ - return static_key_false(&__tracepoint_##name.key); \ + return static_branch_unlikely(&__tracepoint_##name.key);\ } /* @@ -328,7 +328,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct tracepoint __tracepoint_##_name __used \ __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ - .key = STATIC_KEY_INIT_FALSE, \ + .key = STATIC_KEY_FALSE_INIT, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ .iterator = &__traceiter_##_name, \ -- cgit v1.2.3 From 48bcda6848232667f13b4e97588de488c83c37d4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Oct 2024 18:16:29 -0400 Subject: tracing: Remove definition of trace_*_rcuidle() The trace_*_rcuidle() variant of a tracepoint was to handle places where a tracepoint was located but RCU was not "watching". All those locations have been removed, and RCU should be watching where all tracepoints are located. We can now remove the trace_*_rcuidle() variant. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Joel Fernandes Link: https://lore.kernel.org/20241003181629.36209057@gandalf.local.home Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 50 ++------------------------------------- include/trace/events/preemptirq.h | 8 ------- 2 files changed, 2 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2a29334bbc02..7e4af7b3633c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -196,67 +196,25 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ -/* - * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle - * code that disallow any/all tracing/instrumentation when RCU isn't watching. - */ -#ifdef CONFIG_ARCH_WANTS_NO_INSTR -#define RCUIDLE_COND(rcuidle) (rcuidle) -#else -/* srcu can't be used from NMI */ -#define RCUIDLE_COND(rcuidle) (rcuidle && in_nmi()) -#endif - /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ -#define __DO_TRACE(name, args, cond, rcuidle) \ +#define __DO_TRACE(name, args, cond) \ do { \ int __maybe_unused __idx = 0; \ \ if (!(cond)) \ return; \ \ - if (WARN_ONCE(RCUIDLE_COND(rcuidle), \ - "Bad RCU usage for tracepoint")) \ - return; \ - \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ - /* \ - * For rcuidle callers, use srcu since sched-rcu \ - * doesn't work from the idle path. \ - */ \ - if (rcuidle) { \ - __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - ct_irq_enter_irqson(); \ - } \ - \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ - if (rcuidle) { \ - ct_irq_exit_irqson(); \ - srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ - } \ - \ preempt_enable_notrace(); \ } while (0) -#ifndef MODULE -#define __DECLARE_TRACE_RCU(name, proto, args, cond) \ - static inline void trace_##name##_rcuidle(proto) \ - { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond), 1); \ - } -#else -#define __DECLARE_TRACE_RCU(name, proto, args, cond) -#endif - /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -277,14 +235,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ - TP_CONDITION(cond), 0); \ + TP_CONDITION(cond)); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ } \ - __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ - PARAMS(cond)) \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -375,8 +331,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ static inline void trace_##name(proto) \ { } \ - static inline void trace_##name##_rcuidle(proto) \ - { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h index 3f249e150c0c..f99562d2b496 100644 --- a/include/trace/events/preemptirq.h +++ b/include/trace/events/preemptirq.h @@ -43,8 +43,6 @@ DEFINE_EVENT(preemptirq_template, irq_enable, #else #define trace_irq_enable(...) #define trace_irq_disable(...) -#define trace_irq_enable_rcuidle(...) -#define trace_irq_disable_rcuidle(...) #endif #ifdef CONFIG_TRACE_PREEMPT_TOGGLE @@ -58,8 +56,6 @@ DEFINE_EVENT(preemptirq_template, preempt_enable, #else #define trace_preempt_enable(...) #define trace_preempt_disable(...) -#define trace_preempt_enable_rcuidle(...) -#define trace_preempt_disable_rcuidle(...) #endif #endif /* _TRACE_PREEMPTIRQ_H */ @@ -69,10 +65,6 @@ DEFINE_EVENT(preemptirq_template, preempt_enable, #else /* !CONFIG_PREEMPTIRQ_TRACEPOINTS */ #define trace_irq_enable(...) #define trace_irq_disable(...) -#define trace_irq_enable_rcuidle(...) -#define trace_irq_disable_rcuidle(...) #define trace_preempt_enable(...) #define trace_preempt_disable(...) -#define trace_preempt_enable_rcuidle(...) -#define trace_preempt_disable_rcuidle(...) #endif -- cgit v1.2.3 From e53244e2c8931f9e80c1841293aea86ef8ad32a3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Oct 2024 18:42:20 -0400 Subject: tracepoint: Remove SRCU protection With the removal of the trace_*_rcuidle() tracepoints, there is no reason to protect tracepoints with SRCU. The reason the SRCU protection was added, was because it can protect tracepoints when RCU is not "watching". Now that tracepoints are only used when RCU is watching, remove the SRCU protection. It just made things more complex and confusing anyway. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Joel Fernandes Link: https://lore.kernel.org/20241003184220.0dc21d35@gandalf.local.home Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e4af7b3633c..3d33b9872cec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -32,8 +32,6 @@ struct trace_eval_map { #define TRACEPOINT_DEFAULT_PRIO 10 -extern struct srcu_struct tracepoint_srcu; - extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int @@ -109,7 +107,6 @@ void for_each_tracepoint_in_module(struct module *mod, #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { - synchronize_srcu(&tracepoint_srcu); synchronize_rcu(); } #else @@ -207,7 +204,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ -- cgit v1.2.3 From db03488897a70367aeafe82d07a78943d2a6068e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 9 Oct 2024 08:33:05 +0200 Subject: Revert "wifi: cfg80211: unexport wireless_nlevent_flush()" Revert this, I neglected to take into account the fact that cfg80211 itself can be a module, but wext is always builtin. Fixes: aee809aaa2d1 ("wifi: cfg80211: unexport wireless_nlevent_flush()") Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index c9b46b996197..b80e474cb0aa 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -410,6 +410,12 @@ struct iw_spy_data { /* Send a single event to user space */ void wireless_send_event(struct net_device *dev, unsigned int cmd, union iwreq_data *wrqu, const char *extra); +#ifdef CONFIG_WEXT_CORE +/* flush all previous wext events - if work is done from netdev notifiers */ +void wireless_nlevent_flush(void); +#else +static inline void wireless_nlevent_flush(void) {} +#endif /* We may need a function to send a stream of events to user space. * More on that later... */ -- cgit v1.2.3 From 975bdea8c470cf10637c58129edaae731fec9e93 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Sun, 6 Oct 2024 21:18:19 +0300 Subject: drm/mipi-dsi: add mipi_dsi_compression_mode_multi mipi_dsi_compression_mode_multi can help with error handling. Signed-off-by: Dzmitry Sankouski Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20241006-starqltechn_integration_upstream-v6-1-8336b9cd6c34@gmail.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20241006-starqltechn_integration_upstream-v6-1-8336b9cd6c34@gmail.com --- include/drm/drm_mipi_dsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index f725f8654611..94400a78031f 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -280,6 +280,8 @@ void mipi_dsi_compression_mode_ext_multi(struct mipi_dsi_multi_context *ctx, bool enable, enum mipi_dsi_compression_algo algo, unsigned int pps_selector); +void mipi_dsi_compression_mode_multi(struct mipi_dsi_multi_context *ctx, + bool enable); void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx, const struct drm_dsc_picture_parameter_set *pps); -- cgit v1.2.3 From f7a870d0be12e3ae38cbe899d858994c5b51f22b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 Oct 2024 17:15:35 +0900 Subject: ata: libata: Remove unused macro definitions ATA_TMOUT_BOOT and ATA_TMOUT_BOOT_QUICK are not used anywhere. Delete these definitions. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20241009081535.376994-1-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9b4a6ff03235..c1a85d46eba6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -270,9 +270,7 @@ enum { /* bits 24:31 of host->flags are reserved for LLD specific flags */ - /* various lengths of time */ - ATA_TMOUT_BOOT = 30000, /* heuristic */ - ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ + /* Various lengths of time */ ATA_TMOUT_INTERNAL_QUICK = 5000, ATA_TMOUT_MAX_PARK = 30000, -- cgit v1.2.3 From d12586e1072d92070783c854819a0ca8c82c5439 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 5 Oct 2024 23:38:25 +0200 Subject: platform/x86: wmi: Implement proper shutdown handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When performing a system shutdown under Windows, all WMI clients are terminated. This means that the ACPI BIOS might expect all WMI devices to be disabled when shutting down. Emulate this behaviour by disabling all active WMI devices during shutdown. Also introduce a new WMI driver callback to allow WMI drivers to perform any device-specific actions before disabling the WMI device. Tested on a Dell Inspiron 3505. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20241005213825.701887-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/wmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 3275470b5531..120019677fc6 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -56,6 +56,7 @@ u8 wmidev_instance_count(struct wmi_device *wdev); * @no_singleton: Driver can be instantiated multiple times * @probe: Callback for device binding * @remove: Callback for device unbinding + * @shutdown: Callback for device shutdown * @notify: Callback for receiving WMI events * * This represents WMI drivers which handle WMI devices. @@ -68,6 +69,7 @@ struct wmi_driver { int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); + void (*shutdown)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); }; -- cgit v1.2.3 From f042365a26b07006064c2cfa2293e16cad243b0d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 4 Oct 2024 11:20:56 +0100 Subject: net: pcs: xpcs: provide a helper to get the phylink pcs given xpcs Provide a helper to provide the pointer to the phylink_pcs struct given a valid xpcs pointer. This will be necessary when we make struct dw_xpcs private to pcs-xpcs.c Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index abda475111d1..868515f3cc88 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -64,6 +64,7 @@ struct dw_xpcs { bool need_reset; }; +struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, -- cgit v1.2.3 From accd5f5cd2e1b0ed6805a7c24765648d213561e5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 4 Oct 2024 11:21:01 +0100 Subject: net: pcs: xpcs: move definition of struct dw_xpcs to private header There should be no reason for anything outside the XPCS code to know the contents of struct dw_xpcs - this is a private structure to XPCS. Move the definition to the private pcs-xpcs.h header, leaving a declaration in the global pcs/pcs-xpcs.h Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 868515f3cc88..b5b5d17998b8 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -21,8 +21,6 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 -struct dw_xpcs_desc; - enum dw_xpcs_pcs_id { DW_XPCS_ID_NATIVE = 0, NXP_SJA1105_XPCS_ID = 0x00000010, @@ -48,21 +46,7 @@ struct dw_xpcs_info { u32 pma; }; -enum dw_xpcs_clock { - DW_XPCS_CORE_CLK, - DW_XPCS_PAD_CLK, - DW_XPCS_NUM_CLKS, -}; - -struct dw_xpcs { - struct dw_xpcs_info info; - const struct dw_xpcs_desc *desc; - struct mdio_device *mdiodev; - struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; - struct phylink_pcs pcs; - phy_interface_t interface; - bool need_reset; -}; +struct dw_xpcs; struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -- cgit v1.2.3 From 20503272422693d793b84f88bf23fe4e955d3a33 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 6 Oct 2024 08:17:58 +0100 Subject: ptp: Add support for the AMZNC10C 'vmclock' device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vmclock device addresses the problem of live migration with precision clocks. The tolerances of a hardware counter (e.g. TSC) are typically around ±50PPM. A guest will use NTP/PTP/PPS to discipline that counter against an external source of 'real' time, and track the precise frequency of the counter as it changes with environmental conditions. When a guest is live migrated, anything it knows about the frequency of the underlying counter becomes invalid. It may move from a host where the counter running at -50PPM of its nominal frequency, to a host where it runs at +50PPM. There will also be a step change in the value of the counter, as the correctness of its absolute value at migration is limited by the accuracy of the source and destination host's time synchronization. In its simplest form, the device merely advertises a 'disruption_marker' which indicates that the guest should throw away any NTP synchronization it thinks it has, and start again. Because the shared memory region can be exposed all the way to userspace through the /dev/vmclock0 node, applications can still use time from a fast vDSO 'system call', and check the disruption marker to be sure that their timestamp is indeed truthful. The structure also allows for the precise time, as known by the host, to be exposed directly to guests so that they don't have to wait for NTP to resync from scratch. The PTP driver consumes this information if present. Like the KVM PTP clock, this PTP driver can convert TSC-based cross timestamps into KVM clock values. Unlike the KVM PTP clock, it does so only when such is actually helpful. The values and fields are based on the nascent virtio-rtc specification, and the intent is that a version (hopefully precisely this version) of this structure will be included as an optional part of that spec. In the meantime, this driver supports the simple ACPI form of the device which is being shipped in certain commercial hypervisors (and submitted for inclusion in QEMU). Signed-off-by: David Woodhouse Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/uapi/linux/vmclock-abi.h | 182 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 include/uapi/linux/vmclock-abi.h (limited to 'include') diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h new file mode 100644 index 000000000000..2d99b29ac44a --- /dev/null +++ b/include/uapi/linux/vmclock-abi.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ + +/* + * This structure provides a vDSO-style clock to VM guests, exposing the + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch + * counter, etc.) and real time. It is designed to address the problem of + * live migration, which other clock enlightenments do not. + * + * When a guest is live migrated, this affects the clock in two ways. + * + * First, even between identical hosts the actual frequency of the underlying + * counter will change within the tolerances of its specification (typically + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the + * same host, but can be tracked by NTP as it generally varies slowly. With + * live migration there is a step change in the frequency, with no warning. + * + * Second, there may be a step change in the value of the counter itself, as + * its accuracy is limited by the precision of the NTP synchronization on the + * source and destination hosts. + * + * So any calibration (NTP, PTP, etc.) which the guest has done on the source + * host before migration is invalid, and needs to be redone on the new host. + * + * In its most basic mode, this structure provides only an indication to the + * guest that live migration has occurred. This allows the guest to know that + * its clock is invalid and take remedial action. For applications that need + * reliable accurate timestamps (e.g. distributed databases), the structure + * can be mapped all the way to userspace. This allows the application to see + * directly for itself that the clock is disrupted and take appropriate + * action, even when using a vDSO-style method to get the time instead of a + * system call. + * + * In its more advanced mode. this structure can also be used to expose the + * precise relationship of the CPU counter to real time, as calibrated by the + * host. This means that userspace applications can have accurate time + * immediately after live migration, rather than having to pause operations + * and wait for NTP to recover. This mode does, of course, rely on the + * counter being reliable and consistent across CPUs. + * + * Note that this must be true UTC, never with smeared leap seconds. If a + * guest wishes to construct a smeared clock, it can do so. Presenting a + * smeared clock through this interface would be problematic because it + * actually messes with the apparent counter *period*. A linear smearing + * of 1 ms per second would effectively tweak the counter period by 1000PPM + * at the start/end of the smearing period, while a sinusoidal smear would + * basically be impossible to represent. + * + * This structure is offered with the intent that it be adopted into the + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live + * migration problem seems a little less than fit for purpose. For that + * reason, certain fields use precisely the same numeric definitions as in + * the virtio-rtc proposal. The structure can also be exposed through an ACPI + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for + * the fact that it uses a real _CRS to convey the address of the structure + * (which should be a full page, to allow for mapping directly to userspace). + */ + +#ifndef __VMCLOCK_ABI_H__ +#define __VMCLOCK_ABI_H__ + +#include + +struct vmclock_abi { + /* CONSTANT FIELDS */ + __le32 magic; +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + __le32 size; /* Size of region containing this structure */ + __le16 version; /* 1 */ + __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff + __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ + + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ + __le32 seq_count; /* Low bit means an update is in progress */ + /* + * This field changes to another non-repeating value when the CPU + * counter is disrupted, for example on live migration. This lets + * the guest know that it should discard any calibration it has + * performed of the counter against external sources (NTP/PTP/etc.). + */ + __le64 disruption_marker; + __le64 flags; + /* Indicates that the tai_offset_sec field is valid */ +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) + /* + * Optionally used to notify guests of pending maintenance events. + * A guest which provides latency-sensitive services may wish to + * remove itself from service if an event is coming up. Two flags + * indicate the approximate imminence of the event. + */ +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) + /* + * If the MONOTONIC flag is set then (other than leap seconds) it is + * guaranteed that the time calculated according this structure at + * any given moment shall never appear to be later than the time + * calculated via the structure at any *later* moment. + * + * In particular, a timestamp based on a counter reading taken + * immediately after setting the low bit of seq_count (and the + * associated memory barrier), using the previously-valid time and + * period fields, shall never be later than a timestamp based on + * a counter reading taken immediately before *clearing* the low + * bit again after the update, using the about-to-be-valid fields. + */ +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + + __u8 pad[2]; + __u8 clock_status; +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 + + /* + * The time exposed through this device is never smeared. This field + * corresponds to the 'subtype' field in virtio-rtc, which indicates + * the smearing method. However in this case it provides a *hint* to + * the guest operating system, such that *if* the guest OS wants to + * provide its users with an alternative clock which does not follow + * UTC, it may do so in a fashion consistent with the other systems + * in the nearby environment. + */ + __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 + __le16 tai_offset_sec; /* Actually two's complement signed */ + __u8 leap_indicator; + /* + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined + * in the current draft of virtio-rtc, but since smearing cannot be + * used with the shared memory device, some values are not used. + * + * The _POST_POS and _POST_NEG values allow the guest to perform + * its own smearing during the day or so after a leap second when + * such smearing may need to continue being applied for a leap + * second which is now theoretically "historical". + */ +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_POST_POS 0x04 +#define VMCLOCK_LEAP_POST_NEG 0x05 + + /* Bit shift for counter_period_frac_sec and its error rate */ + __u8 counter_period_shift; + /* + * Paired values of counter and UTC at a given point in time. + */ + __le64 counter_value; + /* + * Counter period, and error margin of same. The unit of these + * fields is 1/2^(64 + counter_period_shift) of a second. + */ + __le64 counter_period_frac_sec; + __le64 counter_period_esterror_rate_frac_sec; + __le64 counter_period_maxerror_rate_frac_sec; + + /* + * Time according to time_type field above. + */ + __le64 time_sec; /* Seconds since time_type epoch */ + __le64 time_frac_sec; /* Units of 1/2^64 of a second */ + __le64 time_esterror_nanosec; + __le64 time_maxerror_nanosec; +}; + +#endif /* __VMCLOCK_ABI_H__ */ -- cgit v1.2.3 From 57e3707eb5e3d9a45eef9151f0378313b1d39a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 5 Aug 2024 11:39:36 +0200 Subject: bpf: Constify ctl_table argument of filter function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is moving to allow "struct ctl_table" in read-only memory. As a preparation for that all functions handling "struct ctl_table" need to be able to work with "const struct ctl_table". As __cgroup_bpf_run_filter_sysctl() does not modify its table, it can be adapted trivially. Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ce91d9b2acb9..4dd17128b204 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,7 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, - struct ctl_table *table, int write, + const struct ctl_table *table, int write, char **buf, size_t *pcount, loff_t *ppos, enum cgroup_bpf_attach_type atype); -- cgit v1.2.3 From 29e1095bb1ad149b5c417719338d9c81d58bf12b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 5 Aug 2024 11:39:37 +0200 Subject: sysctl: move internal interfaces to const struct ctl_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a preparation to make all the core sysctl code work with const struct ctl_table switch over the internal function to use the const variant. Some pointers to "struct ctl_table" need to stay non-const as they are newly allocated and modified before registration. Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index aa4c6d44aaa0..a473deaf5a91 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -162,7 +162,7 @@ struct ctl_node { struct ctl_table_header { union { struct { - struct ctl_table *ctl_table; + const struct ctl_table *ctl_table; int ctl_table_size; int used; int count; -- cgit v1.2.3 From 7abc9b53bd515d7953d1f4e069b062ec4b5ba9e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 5 Aug 2024 11:39:38 +0200 Subject: sysctl: allow registration of const struct ctl_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Putting structure, especially those containing function pointers, into read-only memory makes the safer and easier to reason about. Change the sysctl registration APIs to allow registration of "const struct ctl_table". Signed-off-by: Thomas Weißschuh Acked-by: Kees Cook Reviewed-by: Kees Cook # security/* Signed-off-by: Joel Granados --- include/linux/sysctl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a473deaf5a91..202855befa8b 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -223,13 +223,13 @@ extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, - const char *path, struct ctl_table *table, size_t table_size); -struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table, + const char *path, const struct ctl_table *table, size_t table_size); +struct ctl_table_header *register_sysctl_sz(const char *path, const struct ctl_table *table, size_t table_size); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init_bases(void); -extern void __register_sysctl_init(const char *path, struct ctl_table *table, +extern void __register_sysctl_init(const char *path, const struct ctl_table *table, const char *table_name, size_t table_size); #define register_sysctl_init(path, table) \ __register_sysctl_init(path, table, #table, ARRAY_SIZE(table)) @@ -251,7 +251,7 @@ extern int no_unaligned_warning; #else /* CONFIG_SYSCTL */ -static inline void register_sysctl_init(const char *path, struct ctl_table *table) +static inline void register_sysctl_init(const char *path, const struct ctl_table *table) { } @@ -261,7 +261,7 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p } static inline struct ctl_table_header *register_sysctl_sz(const char *path, - struct ctl_table *table, + const struct ctl_table *table, size_t table_size) { return NULL; -- cgit v1.2.3 From fc5d96670eb2540d2572a14351e82ffe45d5ac11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 11 Sep 2024 14:18:58 +0200 Subject: drm/ttm: Move swapped objects off the manager's LRU list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resources of swapped objects remains on the TTM_PL_SYSTEM manager's LRU list, which is bad for the LRU walk efficiency. Rename the device-wide "pinned" list to "unevictable" and move also resources of swapped-out objects to that list. An alternative would be to create an "UNEVICTABLE" priority to be able to keep the pinned- and swapped objects on their respective manager's LRU without affecting the LRU walk efficiency. v2: - Remove a bogus WARN_ON (Christian König) - Update ttm_resource_[add|del] bulk move (Christian König) - Fix TTM KUNIT tests (Intel CI) v3: - Check for non-NULL bo->resource in ttm_bo_populate(). v4: - Don't move to LRU tail during swapout until the resource is properly swapped or there was a swapout failure. (Intel Ci) - Add a newline after checkpatch check. v5: - Introduce ttm_resource_is_swapped() to avoid a corner-case where a newly created resource was considered swapped. (Intel CI) v6: - Move an assert. Cc: Christian König Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240911121859.85387-2-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_bo.h | 2 ++ include/drm/ttm/ttm_device.h | 5 +++-- include/drm/ttm/ttm_tt.h | 5 +++++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 7b56d1ca36d7..5804408815be 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -462,5 +462,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo); pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res, pgprot_t tmp); void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); +int ttm_bo_populate(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx); #endif diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index c22f30535c84..438358f72716 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -252,9 +252,10 @@ struct ttm_device { spinlock_t lru_lock; /** - * @pinned: Buffer objects which are pinned and so not on any LRU list. + * @unevictable Buffer objects which are pinned or swapped and as such + * not on an LRU list. */ - struct list_head pinned; + struct list_head unevictable; /** * @dev_mapping: A pointer to the struct address_space for invalidating diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 2b9d856ff388..991edafdb2dd 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -129,6 +129,11 @@ static inline bool ttm_tt_is_populated(struct ttm_tt *tt) return tt->page_flags & TTM_TT_FLAG_PRIV_POPULATED; } +static inline bool ttm_tt_is_swapped(const struct ttm_tt *tt) +{ + return tt->page_flags & TTM_TT_FLAG_SWAPPED; +} + /** * ttm_tt_create * -- cgit v1.2.3 From 6607c17c6c5e029da03a90085db22daf518232bf Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Tue, 8 Oct 2024 00:06:15 -0700 Subject: net: mana: Enable debugfs files for MANA device Implement debugfs in MANA driver to be able to view RX,TX,EQ queue specific attributes and dump their gdma queues. These dumps can be used by other userspace utilities to improve debuggability and troubleshooting Following files are added in debugfs: /sys/kernel/debug/mana/ |-------------- 1 |--------------- EQs | |------- eq0 | | |---head | | |---tail | | |---eq_dump | |------- eq1 | . | . | |--------------- adapter-MTU |--------------- vport0 |------- RX-0 | |---cq_budget | |---cq_dump | |---cq_head | |---cq_tail | |---rq_head | |---rq_nbuf | |---rq_tail | |---rxq_dump |------- RX-1 . . |------- TX-0 | |---cq_budget | |---cq_dump | |---cq_head | |---cq_tail | |---sq_head | |---sq_pend_skb_qlen | |---sq_tail | |---txq_dump |------- TX-1 . . Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- include/net/mana/gdma.h | 6 +++++- include/net/mana/mana.h | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index de47fa533b15..90f56656b572 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -267,7 +267,8 @@ struct gdma_event { struct gdma_queue; struct mana_eq { - struct gdma_queue *eq; + struct gdma_queue *eq; + struct dentry *mana_eq_debugfs; }; typedef void gdma_eq_callback(void *context, struct gdma_queue *q, @@ -365,6 +366,7 @@ struct gdma_irq_context { struct gdma_context { struct device *dev; + struct dentry *mana_pci_debugfs; /* Per-vPort max number of queues */ unsigned int max_num_queues; @@ -878,5 +880,7 @@ int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, u32 resp_len, void *resp); int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); +void mana_register_debugfs(void); +void mana_unregister_debugfs(void); #endif /* _GDMA_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9b0faa24b758..0d00b24eacaf 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -350,6 +350,7 @@ struct mana_rxq { int xdp_rc; /* XDP redirect return code */ struct page_pool *page_pool; + struct dentry *mana_rx_debugfs; /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. @@ -363,6 +364,8 @@ struct mana_tx_qp { struct mana_cq tx_cq; mana_handle_t tx_object; + + struct dentry *mana_tx_debugfs; }; struct mana_ethtool_stats { @@ -407,6 +410,7 @@ struct mana_context { u16 num_ports; struct mana_eq *eqs; + struct dentry *mana_eqs_debugfs; struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; }; @@ -468,6 +472,9 @@ struct mana_port_context { bool port_st_save; /* Saved port state */ struct mana_ethtool_stats eth_stats; + + /* Debugfs */ + struct dentry *mana_port_debugfs; }; netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); @@ -494,6 +501,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); extern const struct ethtool_ops mana_ethtool_ops; +extern struct dentry *mana_debugfs_root; /* A CQ can be created not associated with any EQ */ #define GDMA_CQ_NO_EQ 0xffff -- cgit v1.2.3 From 823a566221a5639f6c69424897218e5d6431a970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 9 Oct 2024 11:20:31 +0200 Subject: locking/ww_mutex: Adjust to lockdep nest_lock requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using mutex_acquire_nest() with a nest_lock, lockdep refcounts the number of acquired lockdep_maps of mutexes of the same class, and also keeps a pointer to the first acquired lockdep_map of a class. That pointer is then used for various comparison-, printing- and checking purposes, but there is no mechanism to actively ensure that lockdep_map stays in memory. Instead, a warning is printed if the lockdep_map is freed and there are still held locks of the same lock class, even if the lockdep_map itself has been released. In the context of WW/WD transactions that means that if a user unlocks and frees a ww_mutex from within an ongoing ww transaction, and that mutex happens to be the first ww_mutex grabbed in the transaction, such a warning is printed and there might be a risk of a UAF. Note that this is only problem when lockdep is enabled and affects only dereferences of struct lockdep_map. Adjust to this by adding a fake lockdep_map to the acquired context and make sure it is the first acquired lockdep map of the associated ww_mutex class. Then hold it for the duration of the WW/WD transaction. This has the side effect that trying to lock a ww mutex *without* a ww_acquire_context but where a such context has been acquire, we'd see a lockdep splat. The test-ww_mutex.c selftest attempts to do that, so modify that particular test to not acquire a ww_acquire_context if it is not going to be used. Signed-off-by: Thomas Hellström Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241009092031.6356-1-thomas.hellstrom@linux.intel.com --- include/linux/ww_mutex.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index bb763085479a..a401a2f31a77 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -65,6 +65,16 @@ struct ww_acquire_ctx { #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; + /** + * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex. + * + * lockdep requires the lockdep_map for the first locked ww_mutex + * in a ww transaction to remain in memory until all ww_mutexes of + * the transaction have been unlocked. Ensure this by keeping a + * fake locked ww_mutex lockdep map between ww_acquire_init() and + * ww_acquire_fini(). + */ + struct lockdep_map first_lock_dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; @@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); + lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name, + &ww_class->mutex_key, 0); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); + mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; @@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC + mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES -- cgit v1.2.3 From c3e91446a3580353672e965165ab37db2bf6a757 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Oct 2024 17:03:00 +0300 Subject: drm/file: fix client_name_lock kernel-doc warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's client_name_lock, not name_lock. Also unify style while at it. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20241009172650.29169e6f@canb.auug.org.au Fixes: 56c594d8df64 ("drm: add DRM_SET_CLIENT_NAME ioctl") Reviewed-by: Dmitry Osipenko Reviewed-by: Christian König Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20241009140300.1980746-1-jani.nikula@intel.com Signed-off-by: Christian König --- include/drm/drm_file.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index d4f1c115ea0f..f0ef32e9fa5e 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -395,7 +395,10 @@ struct drm_file { * Userspace-provided name; useful for accounting and debugging. */ const char *client_name; - /** @name_lock: Protects @client_name. */ + + /** + * @client_name_lock: Protects @client_name. + */ struct mutex client_name_lock; }; -- cgit v1.2.3 From 3639fadc7e98a5b0aef399d7beef24b028fdf898 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Oct 2024 17:04:52 +0300 Subject: drm/imx: add forward declarations for types The imx.h header does not forward declare the types it uses, and the header is not self-contained. Fix it. Fixes: cc3e8a216d6b ("drm/imx: add internal bridge handling display-timings DT node") Cc: Philipp Zabel Cc: Dmitry Baryshkov Cc: imx@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Reviewed-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20241009140452.1981175-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/bridge/imx.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/bridge/imx.h b/include/drm/bridge/imx.h index e14f429a9ca2..b93f719fe0e7 100644 --- a/include/drm/bridge/imx.h +++ b/include/drm/bridge/imx.h @@ -6,6 +6,10 @@ #ifndef DRM_IMX_BRIDGE_H #define DRM_IMX_BRIDGE_H +struct device; +struct device_node; +struct drm_bridge; + struct drm_bridge *devm_imx_drm_legacy_bridge(struct device *dev, struct device_node *np, int type); -- cgit v1.2.3 From 5461f3fd74a89757f95f351eb0bc26aafc2a2e91 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Sep 2024 00:27:58 +0100 Subject: backlight: Remove notifier backlight_register_notifier and backlight_unregister_notifier have been unused since commit 6cb634d0dc85 ("ACPI: video: Remove code to unregister acpi_video backlight when a native backlight registers") With those not being called, it means that the backlight_notifier list is always empty. Remove the functions, the list itself and the enum used in the notifications. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Daniel Thompson Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20240919232758.639925-1-linux@treblig.org Signed-off-by: Lee Jones --- include/linux/backlight.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index ea9c1bc8148e..f5652e5a9060 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -66,24 +66,6 @@ enum backlight_type { BACKLIGHT_TYPE_MAX, }; -/** - * enum backlight_notification - the type of notification - * - * The notifications that is used for notification sent to the receiver - * that registered notifications using backlight_register_notifier(). - */ -enum backlight_notification { - /** - * @BACKLIGHT_REGISTERED: The backlight device is registered. - */ - BACKLIGHT_REGISTERED, - - /** - * @BACKLIGHT_UNREGISTERED: The backlight revice is unregistered. - */ - BACKLIGHT_UNREGISTERED, -}; - /** enum backlight_scale - the type of scale used for brightness values * * The type of scale used for brightness values. @@ -421,8 +403,6 @@ void devm_backlight_device_unregister(struct device *dev, struct backlight_device *bd); void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason); -int backlight_register_notifier(struct notifier_block *nb); -int backlight_unregister_notifier(struct notifier_block *nb); struct backlight_device *backlight_device_get_by_name(const char *name); struct backlight_device *backlight_device_get_by_type(enum backlight_type type); int backlight_device_set_brightness(struct backlight_device *bd, -- cgit v1.2.3 From 4c93ede2b0c73a7708f46a01669769d15d31e1d2 Mon Sep 17 00:00:00 2001 From: R Sundar Date: Thu, 3 Oct 2024 08:08:06 +0530 Subject: drm: Fix for kernel doc warning Added colon in kernel-doc comment to fix the warning. ./include/drm/drm_drv.h:372: warning: Incorrect use of kernel-doc format: * @fbdev_probe ./include/drm/drm_drv.h:435: warning: Function parameter or struct member 'fbdev_probe' not described in 'drm_driver' Signed-off-by: R Sundar Link: https://patchwork.freedesktop.org/patch/msgid/20241003023806.17537-1-prosunofficial@gmail.com Signed-off-by: Jani Nikula --- include/drm/drm_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 36a606af4ba1..1bbbcb8e2d23 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -369,7 +369,7 @@ struct drm_driver { uint64_t *offset); /** - * @fbdev_probe + * @fbdev_probe: * * Allocates and initialize the fb_info structure for fbdev emulation. * Furthermore it also needs to allocate the DRM framebuffer used to -- cgit v1.2.3 From 6f1067cfbee72b04fc42234f7f1588f838cec0b6 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Tue, 1 Oct 2024 13:53:27 +0200 Subject: mfd: Add Congatec Board Controller driver Add core MFD driver for the Board Controller found on some Congatec SMARC module. This Board Controller provides functions like watchdog, GPIO, and I2C busses. This commit adds support only for the conga-SA7 module. Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20241001-congatec-board-controller-v3-1-39ceceed5c47@bootlin.com Signed-off-by: Lee Jones --- include/linux/mfd/cgbc.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/mfd/cgbc.h (limited to 'include') diff --git a/include/linux/mfd/cgbc.h b/include/linux/mfd/cgbc.h new file mode 100644 index 000000000000..badbec4c7033 --- /dev/null +++ b/include/linux/mfd/cgbc.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Congatec Board Controller driver definitions + * + * Copyright (C) 2024 Bootlin + * Author: Thomas Richard + */ + +#ifndef _LINUX_MFD_CGBC_H_ + +/** + * struct cgbc_version - Board Controller device version structure + * @feature: Board Controller feature number + * @major: Board Controller major revision + * @minor: Board Controller minor revision + */ +struct cgbc_version { + unsigned char feature; + unsigned char major; + unsigned char minor; +}; + +/** + * struct cgbc_device_data - Internal representation of the Board Controller device + * @io_session: Pointer to the session IO memory + * @io_cmd: Pointer to the command IO memory + * @session: Session id returned by the Board Controller + * @dev: Pointer to kernel device structure + * @cgbc_version: Board Controller version structure + * @mutex: Board Controller mutex + */ +struct cgbc_device_data { + void __iomem *io_session; + void __iomem *io_cmd; + u8 session; + struct device *dev; + struct cgbc_version version; + struct mutex lock; +}; + +int cgbc_command(struct cgbc_device_data *cgbc, void *cmd, unsigned int cmd_size, + void *data, unsigned int data_size, u8 *status); + +#endif /*_LINUX_MFD_CGBC_H_*/ -- cgit v1.2.3 From 0e6caab8db8bc9359d1a8363e1ee9b2205ed704d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:11 -0400 Subject: tracing: Declare system call tracepoints with TRACE_EVENT_SYSCALL In preparation for allowing system call tracepoints to handle page faults, introduce TRACE_EVENT_SYSCALL to declare the sys_enter/sys_exit tracepoints. Move the common code between __DECLARE_TRACE and __DECLARE_TRACE_SYSCALL into __DECLARE_TRACE_COMMON. This change is not meant to alter the generated code, and only prepares the following modifications. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-2-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 53 +++++++++++++++++++++++++++++++---------- include/trace/bpf_probe.h | 3 +++ include/trace/define_trace.h | 5 ++++ include/trace/events/syscalls.h | 4 ++-- include/trace/perf.h | 3 +++ include/trace/trace_events.h | 28 ++++++++++++++++++++++ 6 files changed, 81 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3d33b9872cec..76e441b39a96 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -197,7 +197,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ -#define __DO_TRACE(name, args, cond) \ +#define __DO_TRACE(name, args, cond, syscall) \ do { \ int __maybe_unused __idx = 0; \ \ @@ -222,21 +222,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * site if it is not watching, as it will need to be active when the * tracepoint is enabled. */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE_COMMON(name, proto, args, cond, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ - static inline void trace_##name(proto) \ - { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond)); \ - if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - WARN_ONCE(!rcu_is_watching(), \ - "RCU not watching for tracepoint"); \ - } \ - } \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -266,6 +255,34 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return static_branch_unlikely(&__tracepoint_##name.key);\ } +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + static inline void trace_##name(proto) \ + { \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __DO_TRACE(name, \ + TP_ARGS(args), \ + TP_CONDITION(cond), 0); \ + if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ + } \ + } + +#define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + static inline void trace_##name(proto) \ + { \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __DO_TRACE(name, \ + TP_ARGS(args), \ + TP_CONDITION(cond), 1); \ + if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ + } \ + } + /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration @@ -348,6 +365,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return false; \ } +#define __DECLARE_TRACE_SYSCALL __DECLARE_TRACE + #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) #define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) @@ -409,6 +428,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) +#define DECLARE_TRACE_SYSCALL(name, proto, args) \ + __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto)) + #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) @@ -546,6 +570,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct, assign, print) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) +#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ + print, reg, unreg) \ + DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a2ea11cc912e..c85bbce5aaa5 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -53,6 +53,9 @@ __bpf_trace_##call(void *__data, proto) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + /* * This part is compiled out, it is only here as a build time check * to make sure that if the tracepoint handling changes, the diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 00723935dcc7..ff5fa17a6259 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -46,6 +46,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) +#undef TRACE_EVENT_SYSCALL +#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, print, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) + #undef TRACE_EVENT_NOP #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) @@ -107,6 +111,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN #undef TRACE_EVENT_FN_COND +#undef TRACE_EVENT_SYSCALL #undef TRACE_EVENT_CONDITION #undef TRACE_EVENT_NOP #undef DEFINE_EVENT_NOP diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index b6e0cbc2c71f..f31ff446b468 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -15,7 +15,7 @@ #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS -TRACE_EVENT_FN(sys_enter, +TRACE_EVENT_SYSCALL(sys_enter, TP_PROTO(struct pt_regs *regs, long id), @@ -41,7 +41,7 @@ TRACE_EVENT_FN(sys_enter, TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY) -TRACE_EVENT_FN(sys_exit, +TRACE_EVENT_SYSCALL(sys_exit, TP_PROTO(struct pt_regs *regs, long ret), diff --git a/include/trace/perf.h b/include/trace/perf.h index 2c11181c82e0..ded997af481e 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -55,6 +55,9 @@ perf_trace_##call(void *__data, proto) \ head, __task); \ } +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + /* * This part is compiled out, it is only here as a build time check * to make sure that if the tracepoint handling changes, the diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index c2f9cabf154d..8bcbb9ee44de 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -45,6 +45,16 @@ PARAMS(print)); \ DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); +#undef TRACE_EVENT_SYSCALL +#define TRACE_EVENT_SYSCALL(name, proto, args, tstruct, assign, print, reg, unreg) \ + DECLARE_EVENT_SYSCALL_CLASS(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + #include "stages/stage1_struct_define.h" #undef DECLARE_EVENT_CLASS @@ -57,6 +67,9 @@ \ static struct trace_event_class event_class_##name; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ static struct trace_event_call __used \ @@ -117,6 +130,9 @@ tstruct; \ }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) @@ -208,6 +224,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ .trace = trace_raw_output_##call, \ }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ static notrace enum print_line_t \ @@ -265,6 +284,9 @@ static inline notrace int trace_event_get_offsets_##call( \ return __data_size; \ } +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -409,6 +431,9 @@ trace_event_raw_event_##call(void *__data, proto) \ * fail to compile unless it too is updated. */ +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ static inline void ftrace_test_probe_##call(void) \ @@ -434,6 +459,9 @@ static struct trace_event_class __used __refdata event_class_##call = { \ _TRACE_PERF_INIT(call) \ }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ \ -- cgit v1.2.3 From 13d750c2c03e9861e15268574ed2c239cca9c9d5 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:12 -0400 Subject: tracing/ftrace: disable preemption in syscall probe In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that ftrace can handle this change by explicitly disabling preemption within the ftrace system call tracepoint probes to respect the current expectations within ftrace ring buffer code. This change does not yet allow ftrace to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-3-mathieu.desnoyers@efficios.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/trace_events.h | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8bcbb9ee44de..63071aa5923d 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -263,6 +263,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ tstruct \ {} }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) @@ -396,11 +399,11 @@ static inline notrace int trace_event_get_offsets_##call( \ #include "stages/stage6_event_callback.h" -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ - \ + +#undef __DECLARE_EVENT_CLASS +#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace void \ -trace_event_raw_event_##call(void *__data, proto) \ +do_trace_event_raw_event_##call(void *__data, proto) \ { \ struct trace_event_file *trace_file = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ @@ -425,15 +428,35 @@ trace_event_raw_event_##call(void *__data, proto) \ \ trace_event_buffer_commit(&fbuffer); \ } + +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +trace_event_raw_event_##call(void *__data, proto) \ +{ \ + do_trace_event_raw_event_##call(__data, args); \ +} + +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +trace_event_raw_event_##call(void *__data, proto) \ +{ \ + preempt_disable_notrace(); \ + do_trace_event_raw_event_##call(__data, args); \ + preempt_enable_notrace(); \ +} + /* * The ftrace_test_probe is compiled out, it is only here as a build time check * to make sure that if the tracepoint handling changes, the ftrace probe will * fail to compile unless it too is updated. */ -#undef DECLARE_EVENT_SYSCALL_CLASS -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS - #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ static inline void ftrace_test_probe_##call(void) \ @@ -443,6 +466,8 @@ static inline void ftrace_test_probe_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef __DECLARE_EVENT_CLASS + #include "stages/stage7_class_define.h" #undef DECLARE_EVENT_CLASS -- cgit v1.2.3 From 65e7462a16cea593025ca3b34c5d74e69b027ee0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:13 -0400 Subject: tracing/perf: disable preemption in syscall probe In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that perf can handle this change by explicitly disabling preemption within the perf system call tracepoint probes to respect the current expectations within perf ring buffer code. This change does not yet allow perf to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-4-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/perf.h | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/perf.h b/include/trace/perf.h index ded997af481e..15cde7eac8b4 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -12,10 +12,10 @@ #undef __perf_task #define __perf_task(t) (__task = (t)) -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +#undef __DECLARE_EVENT_CLASS +#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace void \ -perf_trace_##call(void *__data, proto) \ +do_perf_trace_##call(void *__data, proto) \ { \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ @@ -55,8 +55,39 @@ perf_trace_##call(void *__data, proto) \ head, __task); \ } +/* + * Define unused __count and __task variables to use @args to pass + * arguments to do_perf_trace_##call. This is needed because the + * macros __perf_count and __perf_task introduce the side-effect to + * store copies into those local variables. + */ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +perf_trace_##call(void *__data, proto) \ +{ \ + u64 __count __attribute__((unused)); \ + struct task_struct *__task __attribute__((unused)); \ + \ + do_perf_trace_##call(__data, args); \ +} + #undef DECLARE_EVENT_SYSCALL_CLASS -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +perf_trace_##call(void *__data, proto) \ +{ \ + u64 __count __attribute__((unused)); \ + struct task_struct *__task __attribute__((unused)); \ + \ + preempt_disable_notrace(); \ + do_perf_trace_##call(__data, args); \ + preempt_enable_notrace(); \ +} /* * This part is compiled out, it is only here as a build time check @@ -76,4 +107,7 @@ static inline void perf_test_probe_##call(void) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __DECLARE_EVENT_CLASS + #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3 From 4aadde89d81fbf4cf3105a61dbc48888b819ecfb Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:14 -0400 Subject: tracing/bpf: disable preemption in syscall probe In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that bpf can handle this change by explicitly disabling preemption within the bpf system call tracepoint probes to respect the current expectations within bpf tracing code. This change does not yet allow bpf to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-5-mathieu.desnoyers@efficios.com Acked-by: Andrii Nakryiko Tested-by: Andrii Nakryiko # BPF parts Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/bpf_probe.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index c85bbce5aaa5..fec97c93e1c9 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -53,8 +53,18 @@ __bpf_trace_##call(void *__data, proto) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#define __BPF_DECLARE_TRACE_SYSCALL(call, proto, args) \ +static notrace void \ +__bpf_trace_##call(void *__data, proto) \ +{ \ + preempt_disable_notrace(); \ + CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ + preempt_enable_notrace(); \ +} + #undef DECLARE_EVENT_SYSCALL_CLASS -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ + __BPF_DECLARE_TRACE_SYSCALL(call, PARAMS(proto), PARAMS(args)) /* * This part is compiled out, it is only here as a build time check -- cgit v1.2.3 From a363d27cdbc2bc2d1899b5a1520b64e3590fcd9a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:15 -0400 Subject: tracing: Allow system call tracepoints to handle page faults Use Tasks Trace RCU to protect iteration of system call enter/exit tracepoint probes to allow those probes to handle page faults. In preparation for this change, all tracers registering to system call enter/exit tracepoints should expect those to be called with preemption enabled. This allows tracers to fault-in userspace system call arguments such as path strings within their probe callbacks. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-6-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 76e441b39a96..0dc67fad706c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ void for_each_tracepoint_in_module(struct module *mod, #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { + synchronize_rcu_tasks_trace(); synchronize_rcu(); } #else @@ -196,6 +198,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. + * + * With @syscall=0, the tracepoint callback array dereference is + * protected by disabling preemption. + * With @syscall=1, the tracepoint callback array dereference is + * protected by Tasks Trace RCU, which allows probes to handle page + * faults. */ #define __DO_TRACE(name, args, cond, syscall) \ do { \ @@ -204,11 +212,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - preempt_disable_notrace(); \ + if (syscall) \ + rcu_read_lock_trace(); \ + else \ + preempt_disable_notrace(); \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ - preempt_enable_notrace(); \ + if (syscall) \ + rcu_read_unlock_trace(); \ + else \ + preempt_enable_notrace(); \ } while (0) /* -- cgit v1.2.3 From a3204c740a59bebb3b37a294d83f4e353303a52c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:16 -0400 Subject: tracing/ftrace: Add might_fault check to syscall probes Add a might_fault() check to validate that the ftrace sys_enter/sys_exit probe callbacks are indeed called from a context where page faults can be handled. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-7-mathieu.desnoyers@efficios.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 63071aa5923d..4f22136fd465 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -446,6 +446,7 @@ __DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ static notrace void \ trace_event_raw_event_##call(void *__data, proto) \ { \ + might_fault(); \ preempt_disable_notrace(); \ do_trace_event_raw_event_##call(__data, args); \ preempt_enable_notrace(); \ -- cgit v1.2.3 From cdb537ac417938408ee819992f432c410f2d01a2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:17 -0400 Subject: tracing/perf: Add might_fault check to syscall probes Add a might_fault() check to validate that the perf sys_enter/sys_exit probe callbacks are indeed called from a context where page faults can be handled. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-8-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/perf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/perf.h b/include/trace/perf.h index 15cde7eac8b4..a1754b73a8f5 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -84,6 +84,7 @@ perf_trace_##call(void *__data, proto) \ u64 __count __attribute__((unused)); \ struct task_struct *__task __attribute__((unused)); \ \ + might_fault(); \ preempt_disable_notrace(); \ do_perf_trace_##call(__data, args); \ preempt_enable_notrace(); \ -- cgit v1.2.3 From 0850e1bc88b1bdc30f7f0b223a92eb22e5f06be0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:18 -0400 Subject: tracing/bpf: Add might_fault check to syscall probes Add a might_fault() check to validate that the bpf sys_enter/sys_exit probe callbacks are indeed called from a context where page faults can be handled. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-9-mathieu.desnoyers@efficios.com Acked-by: Andrii Nakryiko Tested-by: Andrii Nakryiko # BPF parts Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/bpf_probe.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index fec97c93e1c9..183fa2aa2935 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -57,6 +57,7 @@ __bpf_trace_##call(void *__data, proto) \ static notrace void \ __bpf_trace_##call(void *__data, proto) \ { \ + might_fault(); \ preempt_disable_notrace(); \ CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ preempt_enable_notrace(); \ -- cgit v1.2.3 From 21291491e3f39d1dac6453e376f7619b21239b5e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 9 Oct 2024 01:35:52 +0100 Subject: clk: Remove unused clk_hw_rate_is_protected clk_hw_rate_is_protected() was added in 2017's commit e55a839a7a1c ("clk: add clock protection mechanism to clk core") but has been unused. Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20241009003552.254675-1-linux@treblig.org Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 7e43caabb54b..5cbd112d1187 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1360,7 +1360,6 @@ unsigned long clk_hw_get_flags(const struct clk_hw *hw); (clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT) bool clk_hw_is_prepared(const struct clk_hw *hw); -bool clk_hw_rate_is_protected(const struct clk_hw *hw); bool clk_hw_is_enabled(const struct clk_hw *hw); bool __clk_is_enabled(struct clk *clk); struct clk *__clk_lookup(const char *name); -- cgit v1.2.3 From a82fcb16d977fa8ac52bdf37d5feb240366722ed Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 21 Aug 2024 17:24:28 -0700 Subject: clk: test: Add test managed of_clk_add_hw_provider() Add a test managed version of of_clk_add_hw_provider() that automatically unregisters the clk_hw provider upon test conclusion. Cc: Brendan Higgins Cc: David Gow Cc: Rae Moar Cc: Peng Fan Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240822002433.1163814-2-sboyd@kernel.org --- include/kunit/clk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/kunit/clk.h b/include/kunit/clk.h index 73bc99cefe7b..0afae7688157 100644 --- a/include/kunit/clk.h +++ b/include/kunit/clk.h @@ -25,4 +25,8 @@ int clk_hw_register_kunit(struct kunit *test, struct device *dev, struct clk_hw int of_clk_hw_register_kunit(struct kunit *test, struct device_node *node, struct clk_hw *hw); +int of_clk_add_hw_provider_kunit(struct kunit *test, struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data), + void *data); + #endif -- cgit v1.2.3 From 00977af42106e82bb2971c59e20d5e02c52e2a65 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 21 Aug 2024 17:24:29 -0700 Subject: of: kunit: Extract some overlay boiler plate into macros Make the lives of __of_overlay_apply_kunit() callers easier by extracting some of the boiler plate involved in referencing the DT overlays. Cc: Brendan Higgins Cc: David Gow Cc: Rae Moar Cc: Peng Fan Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240822002433.1163814-3-sboyd@kernel.org --- include/kunit/of.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/kunit/of.h b/include/kunit/of.h index 48d4e70c9666..75a760a4e2a5 100644 --- a/include/kunit/of.h +++ b/include/kunit/of.h @@ -62,6 +62,13 @@ static inline int __of_overlay_apply_kunit(struct kunit *test, &unused); } +#define of_overlay_begin(overlay_name) __dtbo_##overlay_name##_begin +#define of_overlay_end(overlay_name) __dtbo_##overlay_name##_end + +#define OF_OVERLAY_DECLARE(overlay_name) \ + extern uint8_t of_overlay_begin(overlay_name)[]; \ + extern uint8_t of_overlay_end(overlay_name)[] \ + /** * of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() for built-in overlays * @test: test context @@ -104,12 +111,11 @@ static inline int __of_overlay_apply_kunit(struct kunit *test, */ #define of_overlay_apply_kunit(test, overlay_name) \ ({ \ - extern uint8_t __dtbo_##overlay_name##_begin[]; \ - extern uint8_t __dtbo_##overlay_name##_end[]; \ + OF_OVERLAY_DECLARE(overlay_name); \ \ __of_overlay_apply_kunit((test), \ - __dtbo_##overlay_name##_begin, \ - __dtbo_##overlay_name##_end); \ + of_overlay_begin(overlay_name), \ + of_overlay_end(overlay_name)); \ }) #endif -- cgit v1.2.3 From 2b78d30620d7f8a9f9ce312ad21200ec7a554bd9 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:29 +0200 Subject: ipv4: Convert ip_route_use_hint() to dscp_t. Pass a dscp_t variable to ip_route_use_hint(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only ip_rcv_finish_core() actually calls ip_route_use_hint(). Use the ip4h_dscp() helper to get the DSCP from the IPv4 header. While there, modify the declaration of ip_route_use_hint() in include/net/route.h so that it matches the prototype of its implementation in net/ipv4/route.c. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/c40994fdf804db7a363d04fdee01bf48dddda676.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 5e4374d66927..c219c0fecdcf 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -203,8 +203,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); -int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, +int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, -- cgit v1.2.3 From d32976408744a589f04b5c939f8f01f7167e5167 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:54 +0200 Subject: ipv4: Convert ip_mc_validate_source() to dscp_t. Pass a dscp_t variable to ip_mc_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_mc_validate_source() to consider are: * ip_route_input_mc() which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * udp_v4_early_demux() which gets the DSCP directly from the IPv4 header and can simply use the ip4h_dscp() helper. Also, stop including net/inet_dscp.h in udp.c as we don't use any of its declarations anymore. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/c91b2cca04718b7ee6cf5b9c1d5b40507d65a8d4.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index c219c0fecdcf..586e59f7ed8a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -198,8 +198,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } + int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); -- cgit v1.2.3 From d36236ab52754ef6bd083be945e9c2e93f466022 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:25:02 +0200 Subject: ipv4: Convert fib_validate_source() to dscp_t. Pass a dscp_t variable to fib_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. All callers of fib_validate_source() already have a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversions. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/08612a4519bc5a3578bb493fbaad82437ebb73dc.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 967e4dc555fa..06130933542d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -449,8 +449,9 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); + #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { -- cgit v1.2.3 From 80c549cd1ab0241a7af262690a0ff9991fc74ec5 Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Tue, 8 Oct 2024 18:27:57 +0200 Subject: Fix misspelling of "accept*" in net Several files have "accept*" misspelled as "accpet*" in the comments. Fix all such occurrences. Signed-off-by: Alexander Zubkov Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241008162756.22618-2-green@qrator.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 1a0fe8b151fb..d85d671deed3 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -31,7 +31,7 @@ struct udphdr { #define UDP_CORK 1 /* Never send partially complete segments */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ -#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ +#define UDP_NO_CHECK6_RX 102 /* Disable accepting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ #define UDP_GRO 104 /* This socket can receive UDP GRO packets */ -- cgit v1.2.3 From 87173021f1583ee37f4801fcde354729da8db3dc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 10:29:03 -0700 Subject: ipv4: Link IPv4 address to per-netns hash table. As a prep for per-netns RTNL conversion, we want to namespacify the IPv4 address hash table and the GC work. Let's allocate the per-netns IPv4 address hash table to net->ipv4.inet_addr_lst and link IPv4 addresses into it. The actual users will be converted later. Note that the IPv6 address hash table is already namespacified. Reviewed-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241008172906.1326-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 1 + include/net/netns/ipv4.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index cb5280e6cc21..d0c2bf67a9b0 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -142,6 +142,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) struct in_ifaddr { struct hlist_node hash; + struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 276f622f3516..29eba2eaaa26 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -270,5 +270,6 @@ struct netns_ipv4 { atomic_t rt_genid; siphash_key_t ip_id_key; + struct hlist_head *inet_addr_lst; }; #endif -- cgit v1.2.3 From 1675f385213edc14ed849e079d6866b48e552252 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 10:29:05 -0700 Subject: ipv4: Namespacify IPv4 address GC. Each IPv4 address could have a lifetime, which is useful for DHCP, and GC is periodically executed as check_lifetime_work. check_lifetime() does the actual GC under RTNL. 1. Acquire RTNL 2. Iterate inet_addr_lst 3. Remove IPv4 address if expired 4. Release RTNL Namespacifying the GC is required for per-netns RTNL, but using the per-netns hash table will shorten the time on the hash bucket iteration under RTNL. Let's add per-netns GC work and use the per-netns hash table. Reviewed-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241008172906.1326-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 29eba2eaaa26..66a4cffc44ee 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -271,5 +271,6 @@ struct netns_ipv4 { atomic_t rt_genid; siphash_key_t ip_id_key; struct hlist_head *inet_addr_lst; + struct delayed_work addr_chk_work; }; #endif -- cgit v1.2.3 From 99ee348e6a41cf24b334a1bb7cde87239e8e2d95 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 10:29:06 -0700 Subject: ipv4: Retire global IPv4 hash table inet_addr_lst. No one uses inet_addr_lst anymore, so let's remove it. Reviewed-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241008172906.1326-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d0c2bf67a9b0..d9c690c8c80b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -141,7 +141,6 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ARP_EVICT_NOCARRIER) struct in_ifaddr { - struct hlist_node hash; struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; -- cgit v1.2.3 From ee3283c608dfa21251b0821d7bb198c7ae3189f6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:16 -0400 Subject: timekeeping: Add interfaces for handling timestamps with a floor value Multigrain timestamps allow the kernel to use fine-grained timestamps when an inode's attributes is being actively observed via ->getattr(). With this support, it's possible for a file to get a fine-grained timestamp, and another modified after it to get a coarse-grained stamp that is earlier than the fine-grained time. If this happens then the files can appear to have been modified in reverse order, which breaks VFS ordering guarantees [1]. To prevent this, maintain a floor value for multigrain timestamps. Whenever a fine-grained timestamp is handed out, record it, and when later coarse-grained stamps are handed out, ensure they are not earlier than that value. If the coarse-grained timestamp is earlier than the fine-grained floor, return the floor value instead. Add a static singleton atomic64_t into timekeeper.c that is used to keep track of the latest fine-grained time ever handed out. This is tracked as a monotonic ktime_t value to ensure that it isn't affected by clock jumps. Because it is updated at different times than the rest of the timekeeper object, the floor value is managed independently of the timekeeper via a cmpxchg() operation, and sits on its own cacheline. Add two new public interfaces: - ktime_get_coarse_real_ts64_mg() fills a timespec64 with the later of the coarse-grained clock and the floor time - ktime_get_real_ts64_mg() gets the fine-grained clock value, and tries to swap it into the floor. A timespec64 is filled with the result. The floor value is global and updated via a single try_cmpxchg(). If that fails then the operation raced with a concurrent update. Any concurrent update must be later than the existing floor value, so any racing tasks can accept any resulting floor value without retrying. [1]: POSIX requires that files be stamped with realtime clock values, and makes no provision for dealing with backward clock jumps. If a backward realtime clock jump occurs, then files can appear to have been modified in reverse order. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Acked-by: John Stultz Link: https://lore.kernel.org/all/20241002-mgtime-v10-1-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/linux/timekeeping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..7aa85246c183 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -45,6 +45,10 @@ extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +/* Multigrain timestamp interfaces */ +extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); +extern void ktime_get_real_ts64_mg(struct timespec64 *ts); + void getboottime64(struct timespec64 *ts); /* -- cgit v1.2.3 From 2a15385742c689a271345dcbb4c28b9c568bc7ce Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:17 -0400 Subject: timekeeping: Add percpu counter for tracking floor swap events The mgtime_floor value is a global variable for tracking the latest fine-grained timestamp handed out. Because it's a global, track the number of times that a new floor value is assigned. Add a new percpu counter to the timekeeping code to track the number of floor swap events that have occurred. A later patch will add a debugfs file to display this counter alongside other stats involving multigrain timestamps. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Link: https://lore.kernel.org/all/20241002-mgtime-v10-2-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7aa85246c183..84a035e86ac8 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); extern void ktime_get_real_ts64_mg(struct timespec64 *ts); +extern unsigned long timekeeping_get_mg_floor_swaps(void); void getboottime64(struct timespec64 *ts); -- cgit v1.2.3 From 7f2c86cba3c584c7227cddaabdf0ab54c8151e60 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:20 -0400 Subject: fs: handle delegated timestamps in setattr_copy_mgtime An update to the inode ctime typically requires the latest clock value possible. The exception to this rule is when there is a nfsd write delegation and the server is proxying timestamps from the client. When nfsd gets a CB_GETATTR response, update the timestamp value in the inode to the values that the client is tracking. The client doesn't send a ctime value (since that's always determined by the exported filesystem), but it can send a mtime value. In the case where it does, update the ctime to a value commensurate with that instead of the current time. If ATTR_DELEG is set, then use ia_ctime value instead of setting the timestamp to the current time. With the addition of delegated timestamps, the server may receive a request to update only the atime, which doesn't involve a ctime update. Trust the ATTR_CTIME flag in the update and only update the ctime when it's set. Tested-by: Randy Dunlap # documentation bits Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241002-mgtime-v10-5-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index eff688e75f2f..ea7ed437d2b1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1544,6 +1544,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); +struct timespec64 inode_set_ctime_deleg(struct inode *inode, + struct timespec64 update); static inline time64_t inode_get_atime_sec(const struct inode *inode) { -- cgit v1.2.3 From c86e3c47187ad7fa17f8533a4142453991684b46 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:21 -0400 Subject: fs: tracepoints around multigrain timestamp events Add some tracepoints around various multigrain timestamp events. Reviewed-by: Josef Bacik Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Steven Rostedt (Google) Tested-by: Randy Dunlap # documentation bits Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241002-mgtime-v10-6-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/trace/events/timestamp.h | 124 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 include/trace/events/timestamp.h (limited to 'include') diff --git a/include/trace/events/timestamp.h b/include/trace/events/timestamp.h new file mode 100644 index 000000000000..c9e5ec930054 --- /dev/null +++ b/include/trace/events/timestamp.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timestamp + +#if !defined(_TRACE_TIMESTAMP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMESTAMP_H + +#include +#include + +#define CTIME_QUERIED_FLAGS \ + { I_CTIME_QUERIED, "Q" } + +DECLARE_EVENT_CLASS(ctime, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime), + + TP_ARGS(inode, ctime), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(time64_t, ctime_s) + __field(u32, ctime_ns) + __field(u32, gen) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->gen = inode->i_generation; + __entry->ctime_s = ctime->tv_sec; + __entry->ctime_ns = ctime->tv_nsec; + ), + + TP_printk("ino=%d:%d:%ld:%u ctime=%lld.%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen, + __entry->ctime_s, __entry->ctime_ns + ) +); + +DEFINE_EVENT(ctime, inode_set_ctime_to_ts, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime), + TP_ARGS(inode, ctime)); + +DEFINE_EVENT(ctime, ctime_xchg_skip, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime), + TP_ARGS(inode, ctime)); + +TRACE_EVENT(ctime_ns_xchg, + TP_PROTO(struct inode *inode, + u32 old, + u32 new, + u32 cur), + + TP_ARGS(inode, old, new, cur), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(u32, gen) + __field(u32, old) + __field(u32, new) + __field(u32, cur) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->gen = inode->i_generation; + __entry->old = old; + __entry->new = new; + __entry->cur = cur; + ), + + TP_printk("ino=%d:%d:%ld:%u old=%u:%s new=%u cur=%u:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen, + __entry->old & ~I_CTIME_QUERIED, + __print_flags(__entry->old & I_CTIME_QUERIED, "|", CTIME_QUERIED_FLAGS), + __entry->new, + __entry->cur & ~I_CTIME_QUERIED, + __print_flags(__entry->cur & I_CTIME_QUERIED, "|", CTIME_QUERIED_FLAGS) + ) +); + +TRACE_EVENT(fill_mg_cmtime, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime, + struct timespec64 *mtime), + + TP_ARGS(inode, ctime, mtime), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(time64_t, ctime_s) + __field(time64_t, mtime_s) + __field(u32, ctime_ns) + __field(u32, mtime_ns) + __field(u32, gen) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->gen = inode->i_generation; + __entry->ctime_s = ctime->tv_sec; + __entry->mtime_s = mtime->tv_sec; + __entry->ctime_ns = ctime->tv_nsec; + __entry->mtime_ns = mtime->tv_nsec; + ), + + TP_printk("ino=%d:%d:%ld:%u ctime=%lld.%u mtime=%lld.%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen, + __entry->ctime_s, __entry->ctime_ns, + __entry->mtime_s, __entry->mtime_ns + ) +); +#endif /* _TRACE_TIMESTAMP_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 440e3dcd7c739ba5b256196a89e796fb7e59c755 Mon Sep 17 00:00:00 2001 From: Sunyeal Hong Date: Wed, 9 Oct 2024 13:21:08 +0900 Subject: dt-bindings: clock: exynosautov920: add peric1, misc and hsi0/1 clock definitions Add peric1, misc and hsi0/1 clock definitions. - CMU_PERIC1 for USI, IC2 and I3C - CMU_MISC for MISC, GIC and OTP - HSI0 for PCIE - HSI1 for USB and MMC Signed-off-by: Sunyeal Hong Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20241009042110.2379903-2-sunyeal.hong@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov920.h | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h index c720f344b6bf..0c681f2ba3d0 100644 --- a/include/dt-bindings/clock/samsung,exynosautov920.h +++ b/include/dt-bindings/clock/samsung,exynosautov920.h @@ -160,6 +160,7 @@ #define DOUT_CLKCMU_SNW_NOC 144 #define DOUT_CLKCMU_SSP_NOC 145 #define DOUT_CLKCMU_TAA_NOC 146 +#define DOUT_TCXO_DIV2 147 /* CMU_PERIC0 */ #define CLK_MOUT_PERIC0_IP_USER 1 @@ -188,4 +189,50 @@ #define CLK_DOUT_PERIC0_USI_I2C 23 #define CLK_DOUT_PERIC0_I3C 24 +/* CMU_PERIC1 */ +#define CLK_MOUT_PERIC1_IP_USER 1 +#define CLK_MOUT_PERIC1_NOC_USER 2 +#define CLK_MOUT_PERIC1_USI09_USI 3 +#define CLK_MOUT_PERIC1_USI10_USI 4 +#define CLK_MOUT_PERIC1_USI11_USI 5 +#define CLK_MOUT_PERIC1_USI12_USI 6 +#define CLK_MOUT_PERIC1_USI13_USI 7 +#define CLK_MOUT_PERIC1_USI14_USI 8 +#define CLK_MOUT_PERIC1_USI15_USI 9 +#define CLK_MOUT_PERIC1_USI16_USI 10 +#define CLK_MOUT_PERIC1_USI17_USI 11 +#define CLK_MOUT_PERIC1_USI_I2C 12 +#define CLK_MOUT_PERIC1_I3C 13 + +#define CLK_DOUT_PERIC1_USI09_USI 14 +#define CLK_DOUT_PERIC1_USI10_USI 15 +#define CLK_DOUT_PERIC1_USI11_USI 16 +#define CLK_DOUT_PERIC1_USI12_USI 17 +#define CLK_DOUT_PERIC1_USI13_USI 18 +#define CLK_DOUT_PERIC1_USI14_USI 19 +#define CLK_DOUT_PERIC1_USI15_USI 20 +#define CLK_DOUT_PERIC1_USI16_USI 21 +#define CLK_DOUT_PERIC1_USI17_USI 22 +#define CLK_DOUT_PERIC1_USI_I2C 23 +#define CLK_DOUT_PERIC1_I3C 24 + +/* CMU_MISC */ +#define CLK_MOUT_MISC_NOC_USER 1 +#define CLK_MOUT_MISC_GIC 2 + +#define CLK_DOUT_MISC_OTP 3 +#define CLK_DOUT_MISC_NOCP 4 +#define CLK_DOUT_MISC_OSC_DIV2 5 + +/* CMU_HSI0 */ +#define CLK_MOUT_HSI0_NOC_USER 1 + +#define CLK_DOUT_HSI0_PCIE_APB 2 + +/* CMU_HSI1 */ +#define CLK_MOUT_HSI1_MMC_CARD_USER 1 +#define CLK_MOUT_HSI1_NOC_USER 2 +#define CLK_MOUT_HSI1_USBDRD_USER 3 +#define CLK_MOUT_HSI1_USBDRD 4 + #endif /* _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H */ -- cgit v1.2.3 From 9e542ff8b79ae1871074d3a7dca7de9e7374eeda Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 8 Oct 2024 09:32:04 -0700 Subject: net: Remove likely from l3mdev_master_ifindex_by_index The likely() annotation in l3mdev_master_ifindex_by_index() has been found to be incorrect 100% of the time in real-world workloads (e.g., web servers). Annotated branches shows the following in these servers: correct incorrect % Function File Line 0 169053813 100 l3mdev_master_ifindex_by_index l3mdev.h 81 This is happening because l3mdev_master_ifindex_by_index() is called from __inet_check_established(), which calls l3mdev_master_ifindex_by_index() passing the socked bounded interface. l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); Since most sockets are not going to be bound to a network device, the likely() is giving the wrong assumption. Remove the likely() annotation to ensure more accurate branch prediction. Signed-off-by: Breno Leitao Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241008163205.3939629-1-leitao@debian.org Signed-off-by: Paolo Abeni --- include/net/l3mdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 031c661aa14d..2d6141f28b53 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -78,7 +78,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) struct net_device *dev; int rc = 0; - if (likely(ifindex)) { + if (ifindex) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); -- cgit v1.2.3 From 016f426a14f09faa8bdb68b063c2947edf3108a1 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:09 +0300 Subject: net/mlx5: qos: Flesh out element_attributes in mlx5_ifc.h This is used for multiple purposes, depending on the scheduling element created. There are a few helper struct defined a long time ago, but they are not easy to find in the file and they are about to get new members. This commit cleans up this area a bit by: - moving the helper structs closer to where they are relevant. - defining a helper union to include all of them to help discoverability. - making use of it everywhere element_attributes is used. - using a consistent 'attr' name. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- include/linux/mlx5/mlx5_ifc.h | 67 ++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96d369112bfa..c79ba6197673 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4105,11 +4105,47 @@ enum { ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, }; +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +union mlx5_ifc_element_attributes_bits { + struct mlx5_ifc_tsar_element_bits tsar; + struct mlx5_ifc_vport_element_bits vport; + struct mlx5_ifc_vport_tc_element_bits vport_tc; + u8 reserved_at_0[0x20]; +}; + struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; - u8 element_attributes[0x20]; + union mlx5_ifc_element_attributes_bits element_attributes; u8 parent_element_id[0x20]; @@ -4798,35 +4834,6 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; -struct mlx5_ifc_vport_tc_element_bits { - u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; - u8 vport_number[0x10]; -}; - -struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; - u8 vport_number[0x10]; -}; - -enum { - TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, - TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, - TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, -}; - -enum { - TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, - TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, - TSAR_TYPE_CAP_MASK_ETS = 1 << 2, -}; - -struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; - u8 tsar_type[0x8]; - u8 reserved_at_10[0x10]; -}; - enum { MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1, -- cgit v1.2.3 From fceffbfe57af7d9941d08e1a995cccf558d08451 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Wed, 2 Oct 2024 14:54:58 +0200 Subject: regulator: max5970: Drop unused structs After splitting the max5970 into a MFD device clean the remaining code and drop unused structs. The struct max5970_data and enum max5970_chip_type aren't used. Signed-off-by: Patrick Rudolph Acked-by: Lee Jones Link: https://patch.msgid.link/20241002125500.78278-1-patrick.rudolph@9elements.com Signed-off-by: Mark Brown --- include/linux/mfd/max5970.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/max5970.h b/include/linux/mfd/max5970.h index 762a7d40c843..fc50e89edfaa 100644 --- a/include/linux/mfd/max5970.h +++ b/include/linux/mfd/max5970.h @@ -16,18 +16,6 @@ #define MAX5978_NUM_SWITCHES 1 #define MAX5970_NUM_LEDS 4 -struct max5970_data { - int num_switches; - u32 irng[MAX5970_NUM_SWITCHES]; - u32 mon_rng[MAX5970_NUM_SWITCHES]; - u32 shunt_micro_ohms[MAX5970_NUM_SWITCHES]; -}; - -enum max5970_chip_type { - TYPE_MAX5978 = 1, - TYPE_MAX5970, -}; - #define MAX5970_REG_CURRENT_L(ch) (0x01 + (ch) * 4) #define MAX5970_REG_CURRENT_H(ch) (0x00 + (ch) * 4) #define MAX5970_REG_VOLTAGE_L(ch) (0x03 + (ch) * 4) -- cgit v1.2.3 From 0e8158b4a82eb5bfb69df17510d210b073896f00 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:24 +0200 Subject: OPP: Rework _set_required_devs() to manage a single device per call At this point there are no consumer drivers that makes use of _set_required_devs(), hence it should be straightforward to rework the code to enable it to better integrate with the PM domain attach procedure. During attach, one device at the time is being hooked up to its corresponding PM domain. Therefore, let's update the _set_required_devs() to align to this behaviour, allowing callers to fill out one required_dev per call. Subsequent changes starts making use of this. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-4-ulf.hansson@linaro.org --- include/linux/pm_opp.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 6424692c30b7..bc74bc69107a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -63,10 +63,11 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. Mutually exclusive with required_devs. + * attach. Mutually exclusive with required_dev. * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually - * exclusive with required_devs. - * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. + * exclusive with required_dev. + * @required_dev: Required OPP device. Mutually exclusive with genpd_names/virt_devs. + * @required_dev_index: The index of the required OPP for the @required_dev. * * This structure contains platform specific OPP configurations for the device. */ @@ -81,7 +82,8 @@ struct dev_pm_opp_config { const char * const *regulator_names; const char * const *genpd_names; struct device ***virt_devs; - struct device **required_devs; + struct device *required_dev; + unsigned int required_dev_index; }; #define OPP_LEVEL_UNSET U32_MAX -- cgit v1.2.3 From 98d277a79126a2df8a2617215846d01f823cfffa Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:25 +0200 Subject: PM: domains: Support required OPPs in dev_pm_domain_attach_list() In the multiple PM domain case we need platform code to specify the index of the corresponding required OPP in DT for a device, which is what *_opp_attach_genpd() is there to help us with. However, attaching a device to its PM domains is in general better done with dev_pm_domain_attach_list(). To avoid having two different ways to manage this and to prepare for the removal of *_opp_attach_genpd(), let's extend dev_pm_domain_attach|detach_list() to manage the required OPPs too. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-5-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b637ec14025f..92f9d56f623d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -30,9 +30,16 @@ * supplier and its PM domain when creating the * device-links. * + * PD_FLAG_REQUIRED_OPP: Assign required_devs for the required OPPs. The + * index of the required OPP must correspond to the + * index in the array of the pd_names. If pd_names + * isn't specified, the index just follows the + * index for the attached PM domain. + * */ #define PD_FLAG_NO_DEV_LINK BIT(0) #define PD_FLAG_DEV_LINK_ON BIT(1) +#define PD_FLAG_REQUIRED_OPP BIT(2) struct dev_pm_domain_attach_data { const char * const *pd_names; @@ -43,6 +50,7 @@ struct dev_pm_domain_attach_data { struct dev_pm_domain_list { struct device **pd_devs; struct device_link **pd_links; + u32 *opp_tokens; u32 num_pds; }; -- cgit v1.2.3 From e130ca9d4873f8d01e3e7b8137e0c14196a3cfb4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:27 +0200 Subject: pmdomain: core: Set the required dev for a required OPP during genpd attach In the single PM domain case there is no need for platform code to specify the index of the corresponding required OPP in DT, as the index must be zero. This allows us to assign a required dev for the required OPP from genpd, while attaching a device to its PM domain. In this way, we can remove some of the genpd specific code in the OPP core for the single PM domain case. Although, this cleanup is made from a subsequent change. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-7-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 92f9d56f623d..76775ab38898 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -252,6 +252,7 @@ struct generic_pm_domain_data { unsigned int performance_state; unsigned int default_pstate; unsigned int rpm_pstate; + unsigned int opp_token; bool hw_mode; void *data; }; -- cgit v1.2.3 From d6caca30a548764f8cfd78393ea09fefdf285212 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:32 +0200 Subject: OPP: Drop redundant *_opp_attach|detach_genpd() All users of *_opp_attach|detach_genpd(), have been converted to use dev|devm_pm_domain_attach|detach_list(), hence let's drop it along with its corresponding exported functions. Acked-by: Viresh Kumar Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20241002122232.194245-12-ulf.hansson@linaro.org --- include/linux/pm_opp.h | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index bc74bc69107a..568183e3e641 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -62,11 +62,7 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw: Array of hierarchy of versions to match. * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. - * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. Mutually exclusive with required_dev. - * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually - * exclusive with required_dev. - * @required_dev: Required OPP device. Mutually exclusive with genpd_names/virt_devs. + * @required_dev: The required OPP device. * @required_dev_index: The index of the required OPP for the @required_dev. * * This structure contains platform specific OPP configurations for the device. @@ -80,8 +76,6 @@ struct dev_pm_opp_config { const unsigned int *supported_hw; unsigned int supported_hw_count; const char * const *regulator_names; - const char * const *genpd_names; - struct device ***virt_devs; struct device *required_dev; unsigned int required_dev_index; }; @@ -677,36 +671,6 @@ static inline void dev_pm_opp_put_config_regulators(int token) dev_pm_opp_clear_config(token); } -/* genpd helpers */ -static inline int dev_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - struct dev_pm_opp_config config = { - .genpd_names = names, - .virt_devs = virt_devs, - }; - - return dev_pm_opp_set_config(dev, &config); -} - -static inline void dev_pm_opp_detach_genpd(int token) -{ - dev_pm_opp_clear_config(token); -} - -static inline int devm_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - struct dev_pm_opp_config config = { - .genpd_names = names, - .virt_devs = virt_devs, - }; - - return devm_pm_opp_set_config(dev, &config); -} - /* prop-name helpers */ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { -- cgit v1.2.3 From 13d68a16430312fc21990f48326366eb73891202 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:47 +0200 Subject: genetlink: extend info user-storage to match NL cb ctx This allows a more uniform implementation of non-dump and dump operations, and will be used later in the series to avoid some per-operation allocation. Additionally rename the NL_ASSERT_DUMP_CTX_FITS macro, to fit a more extended usage. Suggested-by: Jakub Kicinski Reviewed-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/1130cc2896626b84587a2a5f96a5c6829638f4da.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 5 +++-- include/net/genetlink.h | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b332c2048c75..a3ca198a3a9e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -34,6 +34,7 @@ struct netlink_skb_parms { #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) +#define NETLINK_CTX_SIZE 48 void netlink_table_grab(void); @@ -293,7 +294,7 @@ struct netlink_callback { int flags; bool strict_check; union { - u8 ctx[48]; + u8 ctx[NETLINK_CTX_SIZE]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. @@ -302,7 +303,7 @@ struct netlink_callback { }; }; -#define NL_ASSERT_DUMP_CTX_FITS(type_name) \ +#define NL_ASSERT_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ab49bfeae78..9d3726e8f90e 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -124,7 +124,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @attrs: netlink attributes * @_net: network namespace - * @user_ptr: user pointers + * @ctx: storage space for the use by the family + * @user_ptr: user pointers (deprecated, use ctx instead) * @extack: extended ACK report struct */ struct genl_info { @@ -135,7 +136,10 @@ struct genl_info { struct genlmsghdr * genlhdr; struct nlattr ** attrs; possible_net_t _net; - void * user_ptr[2]; + union { + u8 ctx[NETLINK_CTX_SIZE]; + void * user_ptr[2]; + }; struct netlink_ext_ack *extack; }; -- cgit v1.2.3 From 04e65df94b3112a1b319b6deb5bab83fd740bc7d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:48 +0200 Subject: netlink: spec: add shaper YAML spec Define the user-space visible interface to query, configure and delete network shapers via yaml definition. Add dummy implementations for the relevant NL callbacks. set() and delete() operations touch a single shaper creating/updating or deleting it. The group() operation creates a shaper's group, nesting multiple input shapers under the specified output shaper. Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/7a33a1ff370bdbcd0cd3f909575c912cd56f41da.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_shaper.h | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 include/uapi/linux/net_shaper.h (limited to 'include') diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h new file mode 100644 index 000000000000..9e3fa63618ee --- /dev/null +++ b/include/uapi/linux/net_shaper.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NET_SHAPER_H +#define _UAPI_LINUX_NET_SHAPER_H + +#define NET_SHAPER_FAMILY_NAME "net-shaper" +#define NET_SHAPER_FAMILY_VERSION 1 + +/** + * enum net_shaper_scope - Defines the shaper @id interpretation. + * @NET_SHAPER_SCOPE_UNSPEC: The scope is not specified. + * @NET_SHAPER_SCOPE_NETDEV: The main shaper for the given network device. + * @NET_SHAPER_SCOPE_QUEUE: The shaper is attached to the given device queue, + * the @id represents the queue number. + * @NET_SHAPER_SCOPE_NODE: The shaper allows grouping of queues or other node + * shapers; can be nested in either @netdev shapers or other @node shapers, + * allowing placement in any location of the scheduling tree, except leaves + * and root. + */ +enum net_shaper_scope { + NET_SHAPER_SCOPE_UNSPEC, + NET_SHAPER_SCOPE_NETDEV, + NET_SHAPER_SCOPE_QUEUE, + NET_SHAPER_SCOPE_NODE, + + /* private: */ + __NET_SHAPER_SCOPE_MAX, + NET_SHAPER_SCOPE_MAX = (__NET_SHAPER_SCOPE_MAX - 1) +}; + +/** + * enum net_shaper_metric - Different metric supported by the shaper. + * @NET_SHAPER_METRIC_BPS: Shaper operates on a bits per second basis. + * @NET_SHAPER_METRIC_PPS: Shaper operates on a packets per second basis. + */ +enum net_shaper_metric { + NET_SHAPER_METRIC_BPS, + NET_SHAPER_METRIC_PPS, +}; + +enum { + NET_SHAPER_A_HANDLE = 1, + NET_SHAPER_A_METRIC, + NET_SHAPER_A_BW_MIN, + NET_SHAPER_A_BW_MAX, + NET_SHAPER_A_BURST, + NET_SHAPER_A_PRIORITY, + NET_SHAPER_A_WEIGHT, + NET_SHAPER_A_IFINDEX, + NET_SHAPER_A_PARENT, + NET_SHAPER_A_LEAVES, + + __NET_SHAPER_A_MAX, + NET_SHAPER_A_MAX = (__NET_SHAPER_A_MAX - 1) +}; + +enum { + NET_SHAPER_A_HANDLE_SCOPE = 1, + NET_SHAPER_A_HANDLE_ID, + + __NET_SHAPER_A_HANDLE_MAX, + NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) +}; + +enum { + NET_SHAPER_CMD_GET = 1, + NET_SHAPER_CMD_SET, + NET_SHAPER_CMD_DELETE, + NET_SHAPER_CMD_GROUP, + + __NET_SHAPER_CMD_MAX, + NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) +}; + +#endif /* _UAPI_LINUX_NET_SHAPER_H */ -- cgit v1.2.3 From 4b623f9f0f59652ea71fcb27d60b4c3b65126dbb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:49 +0200 Subject: net-shapers: implement NL get operation Introduce the basic infrastructure to implement the net-shaper core functionality. Each network devices carries a net-shaper cache, the NL get() operation fetches the data from such cache. The cache is initially empty, will be fill by the set()/group() operation implemented later and is destroyed at device cleanup time. The net_shaper_fill_handle(), net_shaper_ctx_init(), and net_shaper_generic_pre() implementations handle generic index type attributes, despite the current caller always pass a constant value to avoid more noise in later patches using them with different attributes. Reviewed-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/ddd10fd645a9367803ad02fca4a5664ea5ace170.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 21 ++++++++ include/net/net_shaper.h | 120 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 include/net/net_shaper.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3baf8e539b6f..e6b93d01e631 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1603,6 +1603,14 @@ struct net_device_ops { int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_ops: Device shaping offload operations + * see include/net/net_shapers.h + */ + const struct net_shaper_ops *net_shaper_ops; +#endif }; /** @@ -2406,6 +2414,19 @@ struct net_device { u64 max_pacing_offload_horizon; + /** + * @lock: protects @net_shaper_hierarchy, feel free to use for other + * netdev-scope protection. Ordering: take after rtnl_lock. + */ + struct mutex lock; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_hierarchy: data tracking the current shaper status + * see include/net/net_shapers.h + */ + struct net_shaper_hierarchy *net_shaper_hierarchy; +#endif u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h new file mode 100644 index 000000000000..5c3f49b52fe9 --- /dev/null +++ b/include/net/net_shaper.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_SHAPER_H_ +#define _NET_SHAPER_H_ + +#include + +#include + +struct net_device; +struct devlink; +struct netlink_ext_ack; + +enum net_shaper_binding_type { + NET_SHAPER_BINDING_TYPE_NETDEV, + /* NET_SHAPER_BINDING_TYPE_DEVLINK_PORT */ +}; + +struct net_shaper_binding { + enum net_shaper_binding_type type; + union { + struct net_device *netdev; + struct devlink *devlink; + }; +}; + +struct net_shaper_handle { + enum net_shaper_scope scope; + u32 id; +}; + +/** + * struct net_shaper - represents a shaping node on the NIC H/W + * zeroed field are considered not set. + * @parent: Unique identifier for the shaper parent, usually implied + * @handle: Unique identifier for this shaper + * @metric: Specify if the rate limits refers to PPS or BPS + * @bw_min: Minimum guaranteed rate for this shaper + * @bw_max: Maximum peak rate allowed for this shaper + * @burst: Maximum burst for the peek rate of this shaper + * @priority: Scheduling priority for this shaper + * @weight: Scheduling weight for this shaper + */ +struct net_shaper { + struct net_shaper_handle parent; + struct net_shaper_handle handle; + enum net_shaper_metric metric; + u64 bw_min; + u64 bw_max; + u64 burst; + u32 priority; + u32 weight; + + /* private: */ + u32 leaves; /* accounted only for NODE scope */ + struct rcu_head rcu; +}; + +/** + * struct net_shaper_ops - Operations on device H/W shapers + * + * The operations applies to either net_device and devlink objects. + * The initial shaping configuration at device initialization is empty: + * does not constraint the rate in any way. + * The network core keeps track of the applied user-configuration in + * the net_device or devlink structure. + * The operations are serialized via a per device lock. + * + * Device not supporting any kind of nesting should not provide the + * group operation. + * + * Each shaper is uniquely identified within the device with a 'handle' + * comprising the shaper scope and a scope-specific id. + */ +struct net_shaper_ops { + /** + * @group: create the specified shapers scheduling group + * + * Nest the @leaves shapers identified under the * @node shaper. + * All the shapers belong to the device specified by @binding. + * The @leaves arrays size is specified by @leaves_count. + * Create either the @leaves and the @node shaper; or if they already + * exists, links them together in the desired way. + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. + */ + int (*group)(struct net_shaper_binding *binding, int leaves_count, + const struct net_shaper *leaves, + const struct net_shaper *node, + struct netlink_ext_ack *extack); + + /** + * @set: Updates the specified shaper + * + * Updates or creates the @shaper on the device specified by @binding. + */ + int (*set)(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack); + + /** + * @delete: Removes the specified shaper + * + * Removes the shaper configuration as identified by the given @handle + * on the device specified by @binding, restoring the default behavior. + */ + int (*delete)(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack); + + /** + * @capabilities: get the shaper features supported by the device + * + * Fills the bitmask @cap with the supported capabilities for the + * specified @scope and device specified by @binding. + */ + void (*capabilities)(struct net_shaper_binding *binding, + enum net_shaper_scope scope, unsigned long *cap); +}; + +#endif -- cgit v1.2.3 From 14bba9285aedefb99647d716b0f61bf32081e387 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:54 +0200 Subject: netlink: spec: add shaper introspection support Allow the user-space to fine-grain query the shaping features supported by the NIC on each domain. Reviewed-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/3ddd10e450e3fe7d4b944c0d0b886d4483529ee6.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_shaper.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h index 9e3fa63618ee..d8834b59f7d7 100644 --- a/include/uapi/linux/net_shaper.h +++ b/include/uapi/linux/net_shaper.h @@ -65,11 +65,28 @@ enum { NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) }; +enum { + NET_SHAPER_A_CAPS_IFINDEX = 1, + NET_SHAPER_A_CAPS_SCOPE, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_PPS, + NET_SHAPER_A_CAPS_SUPPORT_NESTING, + NET_SHAPER_A_CAPS_SUPPORT_BW_MIN, + NET_SHAPER_A_CAPS_SUPPORT_BW_MAX, + NET_SHAPER_A_CAPS_SUPPORT_BURST, + NET_SHAPER_A_CAPS_SUPPORT_PRIORITY, + NET_SHAPER_A_CAPS_SUPPORT_WEIGHT, + + __NET_SHAPER_A_CAPS_MAX, + NET_SHAPER_A_CAPS_MAX = (__NET_SHAPER_A_CAPS_MAX - 1) +}; + enum { NET_SHAPER_CMD_GET = 1, NET_SHAPER_CMD_SET, NET_SHAPER_CMD_DELETE, NET_SHAPER_CMD_GROUP, + NET_SHAPER_CMD_CAP_GET, __NET_SHAPER_CMD_MAX, NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) -- cgit v1.2.3 From 608a5c05c39b75fa2539ce9e521d289c5a5326f7 Mon Sep 17 00:00:00 2001 From: Wenjun Wu Date: Wed, 9 Oct 2024 10:09:58 +0200 Subject: virtchnl: support queue rate limit and quanta size configuration This patch adds new virtchnl opcodes and structures for rate limit and quanta size configuration, which include: 1. VIRTCHNL_OP_CONFIG_QUEUE_BW, to configure max bandwidth for each VF per queue. 2. VIRTCHNL_OP_CONFIG_QUANTA, to configure quanta size per queue. 3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The configuration is previously set by DCB and PF, and now is the potential QoS capability of VF. VF can take it as reference to configure queue TC mapping. Reviewed-by: Jiri Pirko Signed-off-by: Wenjun Wu Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/839002f7bd6f63b985a060a51b079f6e6dbbe237.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/avf/virtchnl.h | 119 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index f41395264dca..223e433c39fe 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -89,6 +89,9 @@ enum virtchnl_rx_hsplit { VIRTCHNL_RX_HSPLIT_SPLIT_SCTP = 8, }; +enum virtchnl_bw_limit_type { + VIRTCHNL_BW_SHAPER = 0, +}; /* END GENERIC DEFINES */ /* Opcodes for VF-PF communication. These are placed in the v_opcode field @@ -151,6 +154,11 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, + /* opcode 57 - 65 are reserved */ + VIRTCHNL_OP_GET_QOS_CAPS = 66, + /* opcode 68 through 111 are reserved */ + VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, + VIRTCHNL_OP_CONFIG_QUANTA = 113, VIRTCHNL_OP_MAX, }; @@ -261,6 +269,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) +#define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -1416,6 +1425,85 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +struct virtchnl_shaper_bw { + /* Unit is Kbps */ + u32 committed; + u32 peak; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw); + +/* VIRTCHNL_OP_GET_QOS_CAPS + * VF sends this message to get its QoS Caps, such as + * TC number, Arbiter and Bandwidth. + */ +struct virtchnl_qos_cap_elem { + u8 tc_num; + u8 tc_prio; +#define VIRTCHNL_ABITER_STRICT 0 +#define VIRTCHNL_ABITER_ETS 2 + u8 arbiter; +#define VIRTCHNL_STRICT_WEIGHT 1 + u8 weight; + enum virtchnl_bw_limit_type type; + union { + struct virtchnl_shaper_bw shaper; + u8 pad2[32]; + }; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem); + +struct virtchnl_qos_cap_list { + u16 vsi_id; + u16 num_elem; + struct virtchnl_qos_cap_elem cap[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_qos_cap_list); +#define virtchnl_qos_cap_list_LEGACY_SIZEOF 44 + +/* VIRTCHNL_OP_CONFIG_QUEUE_BW */ +struct virtchnl_queue_bw { + u16 queue_id; + u8 tc; + u8 pad; + struct virtchnl_shaper_bw shaper; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw); + +struct virtchnl_queues_bw_cfg { + u16 vsi_id; + u16 num_queues; + struct virtchnl_queue_bw cfg[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_queues_bw_cfg); +#define virtchnl_queues_bw_cfg_LEGACY_SIZEOF 16 + +enum virtchnl_queue_type { + VIRTCHNL_QUEUE_TYPE_TX = 0, + VIRTCHNL_QUEUE_TYPE_RX = 1, +}; + +/* structure to specify a chunk of contiguous queues */ +struct virtchnl_queue_chunk { + /* see enum virtchnl_queue_type */ + s32 type; + u16 start_queue_id; + u16 num_queues; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); + +struct virtchnl_quanta_cfg { + u16 quanta_size; + struct virtchnl_queue_chunk queue_select; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg); + #define __vss_byone(p, member, count, old) \ (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) @@ -1438,6 +1526,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_qos_cap_list, __vss_byelem, p, m, c), \ + __vss(virtchnl_queues_bw_cfg, __vss_byelem, p, m, c), \ __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) @@ -1637,6 +1727,35 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: valid_len = sizeof(struct virtchnl_vlan_setting); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + valid_len = virtchnl_queues_bw_cfg_LEGACY_SIZEOF; + if (msglen >= valid_len) { + struct virtchnl_queues_bw_cfg *q_bw = + (struct virtchnl_queues_bw_cfg *)msg; + + valid_len = virtchnl_struct_size(q_bw, cfg, + q_bw->num_queues); + if (q_bw->num_queues == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + valid_len = sizeof(struct virtchnl_quanta_cfg); + if (msglen >= valid_len) { + struct virtchnl_quanta_cfg *q_quanta = + (struct virtchnl_quanta_cfg *)msg; + + if (q_quanta->quanta_size == 0 || + q_quanta->queue_select.num_queues == 0) { + err_msg_format = true; + break; + } + } + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 5bd48a3a14df4b3ee1be0757efcc0f40d4f57b35 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 10 Oct 2024 04:56:52 +0100 Subject: bpf: fix argument type in bpf_loop documentation The `index` argument to bpf_loop() is threaded as an u64. This lead in a subtle verifier denial where clang cloned the argument in another register[1]. [1] https://github.com/systemd/systemd/pull/34650#issuecomment-2401092895 Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20241010035652.17830-1-technoboy85@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8ab4d8184b9d..874af0186fe8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5371,7 +5371,7 @@ union bpf_attr { * Currently, the **flags** must be 0. Currently, nr_loops is * limited to 1 << 23 (~8 million) loops. * - * long (\*callback_fn)(u32 index, void \*ctx); + * long (\*callback_fn)(u64 index, void \*ctx); * * where **index** is the current index in the loop. The index * is zero-indexed. -- cgit v1.2.3 From 0a6c61bc9c636e9a32d9f5a4d6d3b031d08763ab Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 10 Oct 2024 23:59:09 +0900 Subject: fgraph: Simplify return address printing in function graph tracer Simplify return address printing in the function graph tracer by removing fgraph_extras. Since this feature is only used by the function graph tracer and the feature flags can directly accessible from the function graph tracer, fgraph_extras can be removed from the fgraph callback. Cc: Donglin Peng Link: https://lore.kernel.org/172857234900.270774.15378354017601069781.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ac3b3b53cd0..4c7dd5e58c9f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1068,29 +1068,20 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; -struct fgraph_extras; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, struct fgraph_ops *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *, - struct fgraph_extras *); /* entry */ + struct fgraph_ops *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras); + struct fgraph_ops *gops); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* Used to convey some extra datas when creating a graph entry */ -struct fgraph_extras { - u32 flags; - unsigned long retaddr; -}; - struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; @@ -1131,13 +1122,12 @@ function_graph_enter(unsigned long ret, unsigned long func, struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); -u32 graph_tracer_flags_get(u32 flags); - /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function -- cgit v1.2.3 From bafffd56c608106d11e7aec851f114dcd66b2091 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 10 Oct 2024 14:54:46 +0100 Subject: clocksource: Remove unused clocksource_change_rating clocksource_change_rating() has been unused since 2017's commit 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code") Remove it. __clocksource_change_rating now only has one use which is ifdef'd. Move it into the ifdef'd section. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241010135446.213098-1-linux@treblig.org --- include/linux/clocksource.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d35b677b08fe..ef1b16da6ad5 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -215,7 +215,6 @@ static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, u32 shift) extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); -extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); extern struct clocksource * __init clocksource_default_clock(void); -- cgit v1.2.3 From c6ca31981b545ad3081007b6aa88b6aab1b0cece Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Thu, 10 Oct 2024 12:33:01 -0700 Subject: bpf: Update bpf_override_return() comment The documentation says CONFIG_FUNCTION_ERROR_INJECTION is supported only on x86. This was presumably true at the time of writing, but it's now supported on many other architectures too. Drop this statement, since it's not correct anymore and it fits better in other documentation anyway. Signed-off-by: Martin Kelly Link: https://lore.kernel.org/r/20241010193301.995909-1-martin.kelly@crowdstrike.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 874af0186fe8..627c4195f04f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3103,10 +3103,6 @@ union bpf_attr { * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. * Return * 0 * -- cgit v1.2.3 From 445936f9e258eca624c8239056bd8cd6e853b3fd Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 23 Sep 2024 11:59:57 +0200 Subject: thermal: core: Add user thresholds support The user thresholds mechanism is a way to have the userspace to tell the thermal framework to send a notification when a temperature limit is crossed. There is no id, no hysteresis, just the temperature and the direction of the limit crossing. That means we can be notified when a threshold is crossed the way up only, or the way down only or both ways. That allows to create hysteresis values if it is needed. A threshold can be added, deleted or flushed. The latter means all thresholds belonging to a thermal zone will be deleted. When a threshold is added: - if the same threshold (temperature and direction) exists, an error is returned - if a threshold is specified with the same temperature but a different direction, the specified direction is added - if there is no threshold with the same temperature then it is created When a threshold is deleted: - if the same threshold (temperature and direction) exists, it is deleted - if a threshold is specified with the same temperature but a different direction, the specified direction is removed - if there is no threshold with the same temperature, then an error is returned When the threshold are flushed: - All thresholds related to a thermal zone are deleted When a threshold is crossed: - the userspace does not need to know which threshold(s) have been crossed, it will be notified with the current temperature and the previous temperature - if multiple thresholds have been crossed between two updates only one notification will be send to the userspace, it is pointless to send a notification per thresholds crossed as the userspace can handle that easily when it has the temperature delta information Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20240923100005.2532430-2-daniel.lezcano@linaro.org [ rjw: Subject edit, use BIT(0) and BIT(1) in symbol definitions ] Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 3 +++ include/uapi/linux/thermal.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25ea8fe2313e..bcaa92732e14 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -56,6 +56,9 @@ enum thermal_notify_event { THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ + THERMAL_TZ_ADD_THRESHOLD, /* Threshold added */ + THERMAL_TZ_DEL_THRESHOLD, /* Threshold deleted */ + THERMAL_TZ_FLUSH_THRESHOLDS, /* All thresholds deleted */ }; /** diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index fc78bf3aead7..2e6f60a36173 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -3,6 +3,8 @@ #define _UAPI_LINUX_THERMAL_H #define THERMAL_NAME_LENGTH 20 +#define THERMAL_THRESHOLD_WAY_UP BIT(0) +#define THERMAL_THRESHOLD_WAY_DOWN BIT(1) enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, -- cgit v1.2.3 From 9fb6fef0fb49124291837af1da5028f79d53f98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 14 Jun 2024 13:06:03 +0300 Subject: resource: Add resource set range and size helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the end address for a resource with a given size lacks a helper and is therefore coded manually unlike the getter side which has a helper for resource size calculation. Also, almost all callsites that calculate the end address for a resource also set the start address right before it like this: res->start = start_addr; res->end = res->start + size - 1; Add resource_set_range(res, start_addr, size) that sets the start address and calculates the end address to simplify this often repeated fragment. Also add resource_set_size() for the cases where setting the start address of the resource is not necessary but mention in its kerneldoc that resource_set_range() is preferred when setting both addresses. Link: https://lore.kernel.org/r/20240614100606.15830-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/ioport.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6e9fb667a1c5..5385349f0b8a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); + +/** + * resource_set_size - Calculate resource end address from size and start + * @res: Resource descriptor + * @size: Size of the resource + * + * Calculate the end address for @res based on @size. + * + * Note: The start address of @res must be set when calling this function. + * Prefer resource_set_range() if setting both the start address and @size. + */ +static inline void resource_set_size(struct resource *res, resource_size_t size) +{ + res->end = res->start + size - 1; +} + +/** + * resource_set_range - Set resource start and end addresses + * @res: Resource descriptor + * @start: Start address for the resource + * @size: Size of the resource + * + * Set @res start address and calculate the end address based on @size. + */ +static inline void resource_set_range(struct resource *res, + resource_size_t start, + resource_size_t size) +{ + res->start = start; + resource_set_size(res, size); +} + static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; -- cgit v1.2.3 From 7888af4166d4ab07ba51234be6ba332b7807e901 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Oct 2024 19:05:28 -0400 Subject: ftrace: Make ftrace_regs abstract from direct use ftrace_regs was created to hold registers that store information to save function parameters, return value and stack. Since it is a subset of pt_regs, it should only be used by its accessor functions. But because pt_regs can easily be taken from ftrace_regs (on most archs), it is tempting to use it directly. But when running on other architectures, it may fail to build or worse, build but crash the kernel! Instead, make struct ftrace_regs an empty structure and have the architectures define __arch_ftrace_regs and all the accessor functions will typecast to it to get to the actual fields. This will help avoid usage of ftrace_regs directly. Link: https://lore.kernel.org/all/20241007171027.629bdafd@gandalf.local.home/ Cc: "linux-arch@vger.kernel.org" Cc: "x86@kernel.org" Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Naveen N Rao Cc: Madhavan Srinivasan Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Link: https://lore.kernel.org/20241008230628.958778821@goodmis.org Acked-by: Catalin Marinas Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Acked-by: Heiko Carstens # s390 Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4c7dd5e58c9f..66f10291a0b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val extern int ftrace_enabled; -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS - /** * ftrace_regs - ftrace partial/optimal register set * @@ -142,11 +140,28 @@ extern int ftrace_enabled; * * NOTE: user *must not* access regs directly, only do it via APIs, because * the member can be changed according to the architecture. + * This is why the structure is empty here, so that nothing accesses + * the ftrace_regs directly. */ struct ftrace_regs { + /* Nothing to see here, use the accessor functions! */ +}; + +#define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +struct __arch_ftrace_regs { struct pt_regs regs; }; -#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &arch_ftrace_regs(fregs)->regs; +} /* * ftrace_regs_set_instruction_pointer() is to be defined by the architecture -- cgit v1.2.3 From 483c5c2bc6b1c0d17214e2672032def605ff2ba9 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Fri, 20 Sep 2024 13:34:23 +0800 Subject: serial: clean up uart_info Since commit ebd2c8f6d2ec ("serial: kill off uart_info") has removed uart_info, the uart_info declaration looks lonely, let it go. Signed-off-by: Yanteng Si Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240920053423.1373354-1-siyanteng@cqsoftware.com.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/sa11x0-serial.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/sa11x0-serial.h b/include/linux/platform_data/sa11x0-serial.h index 8b79ab08af45..a88096bc74e4 100644 --- a/include/linux/platform_data/sa11x0-serial.h +++ b/include/linux/platform_data/sa11x0-serial.h @@ -10,7 +10,6 @@ #define SA11X0_SERIAL_H struct uart_port; -struct uart_info; /* * This is a temporary structure for registering these -- cgit v1.2.3 From 7738a7ab9d12c5371ed97114ee2132d4512e9fd5 Mon Sep 17 00:00:00 2001 From: Parker Newman Date: Wed, 2 Oct 2024 11:12:33 -0400 Subject: misc: eeprom: eeprom_93cx6: Add quirk for extra read clock cycle Add a quirk similar to eeprom_93xx46 to add an extra clock cycle before reading data from the EEPROM. The 93Cx6 family of EEPROMs output a "dummy 0 bit" between the writing of the op-code/address from the host to the EEPROM and the reading of the actual data from the EEPROM. More info can be found on page 6 of the AT93C46 datasheet (linked below). Similar notes are found in other 93xx6 datasheets. In summary the read operation for a 93Cx6 EEPROM is: Write to EEPROM: 110[A5-A0] (9 bits) Read from EEPROM: 0[D15-D0] (17 bits) Where: 110 is the start bit and READ OpCode [A5-A0] is the address to read from 0 is a "dummy bit" preceding the actual data [D15-D0] is the actual data. Looking at the READ timing diagrams in the 93Cx6 datasheets the dummy bit should be clocked out on the last address bit clock cycle meaning it should be discarded naturally. However, depending on the hardware configuration sometimes this dummy bit is not discarded. This is the case with Exar PCI UARTs which require an extra clock cycle between sending the address and reading the data. Datasheet: https://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-5193-SEEPROM-AT93C46D-Datasheet.pdf Reviewed-by: Andy Shevchenko Signed-off-by: Parker Newman Link: https://lore.kernel.org/r/0f23973efefccd2544705a0480b4ad4c2353e407.1727880931.git.pnewman@connecttech.com Signed-off-by: Greg Kroah-Hartman --- include/linux/eeprom_93cx6.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h index c860c72a921d..3a485cc0e0fa 100644 --- a/include/linux/eeprom_93cx6.h +++ b/include/linux/eeprom_93cx6.h @@ -11,6 +11,8 @@ Supported chipsets: 93c46, 93c56 and 93c66. */ +#include + /* * EEPROM operation defines. */ @@ -34,6 +36,7 @@ * @register_write(struct eeprom_93cx6 *eeprom): handler to * write to the eeprom register by using all reg_* fields. * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines + * @quirks: eeprom or controller quirks * @drive_data: Set if we're driving the data line. * @reg_data_in: register field to indicate data input * @reg_data_out: register field to indicate data output @@ -50,6 +53,9 @@ struct eeprom_93cx6 { void (*register_write)(struct eeprom_93cx6 *eeprom); int width; + unsigned int quirks; +/* Some EEPROMs require an extra clock cycle before reading */ +#define PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE BIT(0) char drive_data; char reg_data_in; @@ -71,3 +77,8 @@ extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable); extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data); + +static inline bool has_quirk_extra_read_cycle(struct eeprom_93cx6 *eeprom) +{ + return eeprom->quirks & PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE; +} -- cgit v1.2.3 From 11dad94b50263bbe87d015041c77be61c8c44161 Mon Sep 17 00:00:00 2001 From: Andrew Kreimer Date: Thu, 10 Oct 2024 12:13:55 +0300 Subject: phy: sun4i-usb: Fix a typo Fix a typo in comments: wether -> whether. Signed-off-by: Andrew Kreimer Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20241010091355.8271-1-algonell@gmail.com Signed-off-by: Vinod Koul --- include/linux/phy/phy-sun4i-usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h index 91eb755ee73b..f3e7b13608e4 100644 --- a/include/linux/phy/phy-sun4i-usb.h +++ b/include/linux/phy/phy-sun4i-usb.h @@ -11,7 +11,7 @@ /** * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect * @phy: reference to a sun4i usb phy - * @enabled: wether to enable or disable squelch detect + * @enabled: whether to enable or disable squelch detect */ void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled); -- cgit v1.2.3 From e38501cee5364aeb3bd265b484a8e47baa6634aa Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 30 Sep 2024 21:53:03 +0200 Subject: accel/ivpu: Stop using hardcoded DRIVER_DATE Hardcoded driver date is useless, so use kernel version as a driver date to make identifying .ko file easier. Also allow to pass DRIVER_DATE on build time to allow versioning the driver in case it is built out of the tree. Reviewed-by: Karol Wachowski Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-13-jacek.lawrynowicz@linux.intel.com Signed-off-by: Jacek Lawrynowicz --- include/uapi/drm/ivpu_accel.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 084fb529e1e9..234664d35250 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -12,9 +12,6 @@ extern "C" { #endif -#define DRM_IVPU_DRIVER_MAJOR 1 -#define DRM_IVPU_DRIVER_MINOR 0 - #define DRM_IVPU_GET_PARAM 0x00 #define DRM_IVPU_SET_PARAM 0x01 #define DRM_IVPU_BO_CREATE 0x02 -- cgit v1.2.3 From 65b5353193e5a8476814a184e8e1a2627d59f2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 10 Oct 2024 14:45:45 +0200 Subject: drm/ttm: Fix incorrect use of kernel-doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a missing colon. Cc: dri-devel@lists.freedesktop.org Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20241010160942.192caf60@canb.auug.org.au/ Fixes: fc5d96670eb2 ("drm/ttm: Move swapped objects off the manager's LRU list") Signed-off-by: Thomas Hellström Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20241010124545.82023-1-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 438358f72716..39b8636b1845 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -252,7 +252,7 @@ struct ttm_device { spinlock_t lru_lock; /** - * @unevictable Buffer objects which are pinned or swapped and as such + * @unevictable: Buffer objects which are pinned or swapped and as such * not on an LRU list. */ struct list_head unevictable; -- cgit v1.2.3 From d677aebd663ddc287f2b2bda098474694a0ca875 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 03:41:00 +0000 Subject: tcp: move sysctl_tcp_l3mdev_accept to netns_ipv4_read_rx sysctl_tcp_l3mdev_accept is read from TCP receive fast path from tcp_v6_early_demux(), __inet6_lookup_established, inet_request_bound_dev_if(). Move it to netns_ipv4_read_rx. Remove the '#ifdef CONFIG_NET_L3_MASTER_DEV' that was guarding its definition. Note this adds a hole of three bytes that could be filled later. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Cc: Wei Wang Cc: Coco Li Link: https://patch.msgid.link/20241010034100.320832-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 66a4cffc44ee..3b1de80b5c25 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -76,6 +76,8 @@ struct netns_ipv4 { __cacheline_group_begin(netns_ipv4_read_rx); u8 sysctl_ip_early_demux; u8 sysctl_tcp_early_demux; + u8 sysctl_tcp_l3mdev_accept; + /* 3 bytes hole, try to pack */ int sysctl_tcp_reordering; int sysctl_tcp_rmem[3]; __cacheline_group_end(netns_ipv4_read_rx); @@ -151,9 +153,6 @@ struct netns_ipv4 { u8 sysctl_fwmark_reflect; u8 sysctl_tcp_fwmark_accept; -#ifdef CONFIG_NET_L3_MASTER_DEV - u8 sysctl_tcp_l3mdev_accept; -#endif u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; -- cgit v1.2.3 From ed870e35db660724ff0d815d9a3ef9a6247ffbab Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:09 -0700 Subject: lsm: add the lsm_prop data structure When more than one security module is exporting data to audit and networking sub-systems a single 32 bit integer is no longer sufficient to represent the data. Add a structure to be used instead. The lsm_prop structure definition is intended to keep the LSM specific information private to the individual security modules. The module specific information is included in a new set of header files under include/lsm. Each security module is allowed to define the information included for its use in the lsm_prop. SELinux includes a u32 secid. Smack includes a pointer into its global label list. The conditional compilation based on feature inclusion is contained in the include/lsm files. Cc: apparmor@lists.ubuntu.com Cc: bpf@vger.kernel.org Cc: selinux@vger.kernel.org Cc: linux-security-module@vger.kernel.org Suggested-by: Paul Moore Signed-off-by: Casey Schaufler Acked-by: John Johansen [PM: added include/linux/lsm/ to MAINTAINERS, subj tweak] Signed-off-by: Paul Moore --- include/linux/lsm/apparmor.h | 17 +++++++++++++++++ include/linux/lsm/bpf.h | 16 ++++++++++++++++ include/linux/lsm/selinux.h | 16 ++++++++++++++++ include/linux/lsm/smack.h | 17 +++++++++++++++++ include/linux/security.h | 20 ++++++++++++++++++++ 5 files changed, 86 insertions(+) create mode 100644 include/linux/lsm/apparmor.h create mode 100644 include/linux/lsm/bpf.h create mode 100644 include/linux/lsm/selinux.h create mode 100644 include/linux/lsm/smack.h (limited to 'include') diff --git a/include/linux/lsm/apparmor.h b/include/linux/lsm/apparmor.h new file mode 100644 index 000000000000..612cbfacb072 --- /dev/null +++ b/include/linux/lsm/apparmor.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * AppArmor presents single pointer to an aa_label structure. + */ +#ifndef __LINUX_LSM_APPARMOR_H +#define __LINUX_LSM_APPARMOR_H + +struct aa_label; + +struct lsm_prop_apparmor { +#ifdef CONFIG_SECURITY_APPARMOR + struct aa_label *label; +#endif +}; + +#endif /* ! __LINUX_LSM_APPARMOR_H */ diff --git a/include/linux/lsm/bpf.h b/include/linux/lsm/bpf.h new file mode 100644 index 000000000000..8106e206fcef --- /dev/null +++ b/include/linux/lsm/bpf.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * BPF may present a single u32 value. + */ +#ifndef __LINUX_LSM_BPF_H +#define __LINUX_LSM_BPF_H +#include + +struct lsm_prop_bpf { +#ifdef CONFIG_BPF_LSM + u32 secid; +#endif +}; + +#endif /* ! __LINUX_LSM_BPF_H */ diff --git a/include/linux/lsm/selinux.h b/include/linux/lsm/selinux.h new file mode 100644 index 000000000000..9455a6b5b910 --- /dev/null +++ b/include/linux/lsm/selinux.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * SELinux presents a single u32 value which is known as a secid. + */ +#ifndef __LINUX_LSM_SELINUX_H +#define __LINUX_LSM_SELINUX_H +#include + +struct lsm_prop_selinux { +#ifdef CONFIG_SECURITY_SELINUX + u32 secid; +#endif +}; + +#endif /* ! __LINUX_LSM_SELINUX_H */ diff --git a/include/linux/lsm/smack.h b/include/linux/lsm/smack.h new file mode 100644 index 000000000000..ff730dd7a734 --- /dev/null +++ b/include/linux/lsm/smack.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * Smack presents a pointer into the global Smack label list. + */ +#ifndef __LINUX_LSM_SMACK_H +#define __LINUX_LSM_SMACK_H + +struct smack_known; + +struct lsm_prop_smack { +#ifdef CONFIG_SECURITY_SMACK + struct smack_known *skp; +#endif +}; + +#endif /* ! __LINUX_LSM_SMACK_H */ diff --git a/include/linux/security.h b/include/linux/security.h index b86ec2afc691..555249a8d121 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -34,6 +34,10 @@ #include #include #include +#include +#include +#include +#include struct linux_binprm; struct cred; @@ -152,6 +156,22 @@ enum lockdown_reason { LOCKDOWN_CONFIDENTIALITY_MAX, }; +/* scaffolding */ +struct lsm_prop_scaffold { + u32 secid; +}; + +/* + * Data exported by the security modules + */ +struct lsm_prop { + struct lsm_prop_selinux selinux; + struct lsm_prop_smack smack; + struct lsm_prop_apparmor apparmor; + struct lsm_prop_bpf bpf; + struct lsm_prop_scaffold scaffold; +}; + extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; extern u32 lsm_active_cnt; extern const struct lsm_id *lsm_idlist[]; -- cgit v1.2.3 From 870b7fdc660b38c4e1bd8bf48e62aa352ddf8f42 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:10 -0700 Subject: lsm: use lsm_prop in security_audit_rule_match Change the secid parameter of security_audit_rule_match to a lsm_prop structure pointer. Pass the entry from the lsm_prop structure for the approprite slot to the LSM hook. Change the users of security_audit_rule_match to use the lsm_prop instead of a u32. The scaffolding function lsmprop_init() fills the structure with the value of the old secid, ensuring that it is available to the appropriate module hook. The sources of the secid, security_task_getsecid() and security_inode_getsecid(), will be converted to use the lsm_prop structure later in the series. At that point the use of lsmprop_init() is dropped. Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 ++- include/linux/security.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9eca013aa5e1..ea7f17e37756 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -416,7 +416,8 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) -LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) +LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) #endif /* CONFIG_AUDIT */ diff --git a/include/linux/security.h b/include/linux/security.h index 555249a8d121..a4f020491e7c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2115,7 +2115,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); +int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule); void security_audit_rule_free(void *lsmrule); #else @@ -2131,8 +2132,8 @@ static inline int security_audit_rule_known(struct audit_krule *krule) return 0; } -static inline int security_audit_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule) +static inline int security_audit_rule_match(struct lsm_prop *prop, u32 field, + u32 op, void *lsmrule) { return 0; } -- cgit v1.2.3 From 6f2f724f0e116d9ea960ff3dd645add12e60e176 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:11 -0700 Subject: lsm: add lsmprop_to_secctx hook Add a new hook security_lsmprop_to_secctx() and its LSM specific implementations. The LSM specific code will use the lsm_prop element allocated for that module. This allows for the possibility that more than one module may be called upon to translate a secid to a string, as can occur in the audit code. Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ea7f17e37756..ed6ea0b1ec57 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -294,6 +294,8 @@ LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, + char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) diff --git a/include/linux/security.h b/include/linux/security.h index a4f020491e7c..f1c68e38b15d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -535,6 +535,7 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); +int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); void security_inode_invalidate_secctx(struct inode *inode); @@ -1488,7 +1489,14 @@ static inline int security_ismaclabel(const char *name) return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, char **secdata, + u32 *seclen) +{ + return -EOPNOTSUPP; +} + +static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, + char **secdata, u32 *seclen) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 7183abccd8ac2c486363e267b5d84032818eb725 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:12 -0700 Subject: audit: maintain an lsm_prop in audit_context Replace the secid value stored in struct audit_context with a struct lsm_prop. Change the code that uses this value to accommodate the change. security_audit_rule_match() expects a lsm_prop, so existing scaffolding can be removed. A call to security_secid_to_secctx() is changed to security_lsmprop_to_secctx(). The call to security_ipc_getsecid() is scaffolded. A new function lsmprop_is_set() is introduced to identify whether an lsm_prop contains a non-zero value. Signed-off-by: Casey Schaufler [PM: subject line tweak, fix lsmprop_is_set() typo] Signed-off-by: Paul Moore --- include/linux/security.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index f1c68e38b15d..c029bfe2c5bb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,6 +291,19 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) #ifdef CONFIG_SECURITY +/** + * lsmprop_is_set - report if there is a value in the lsm_prop + * @prop: Pointer to the exported LSM data + * + * Returns true if there is a value set, false otherwise + */ +static inline bool lsmprop_is_set(struct lsm_prop *prop) +{ + const struct lsm_prop empty = {}; + + return !!memcmp(prop, &empty, sizeof(*prop)); +} + int call_blocking_lsm_notifier(enum lsm_event event, void *data); int register_blocking_lsm_notifier(struct notifier_block *nb); int unregister_blocking_lsm_notifier(struct notifier_block *nb); @@ -552,6 +565,17 @@ int security_bdev_setintegrity(struct block_device *bdev, size_t size); #else /* CONFIG_SECURITY */ +/** + * lsmprop_is_set - report if there is a value in the lsm_prop + * @prop: Pointer to the exported LSM data + * + * Returns true if there is a value set, false otherwise + */ +static inline bool lsmprop_is_set(struct lsm_prop *prop) +{ + return false; +} + static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) { return 0; -- cgit v1.2.3 From f4602f163c98bc93c118e196466c1c98186adb67 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:13 -0700 Subject: lsm: use lsm_prop in security_ipc_getsecid There may be more than one LSM that provides IPC data for auditing. Change security_ipc_getsecid() to fill in a lsm_prop structure instead of the u32 secid. Change the name to security_ipc_getlsmprop() to reflect the change. Cc: audit@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/security.h | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ed6ea0b1ec57..6ef2a345ea03 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -256,8 +256,8 @@ LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) -LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, - u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg) LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg) LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm) diff --git a/include/linux/security.h b/include/linux/security.h index c029bfe2c5bb..15aef5f68e77 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -289,6 +289,17 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) return kernel_load_data_str[id]; } +/** + * lsmprop_init - initialize a lsm_prop structure + * @prop: Pointer to the data to initialize + * + * Set all secid for all modules to the specified value. + */ +static inline void lsmprop_init(struct lsm_prop *prop) +{ + memset(prop, 0, sizeof(*prop)); +} + #ifdef CONFIG_SECURITY /** @@ -515,7 +526,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); -void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); +void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop); int security_msg_msg_alloc(struct msg_msg *msg); void security_msg_msg_free(struct msg_msg *msg); int security_msg_queue_alloc(struct kern_ipc_perm *msq); @@ -1377,9 +1388,10 @@ static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, return 0; } -static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) +static inline void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_msg_msg_alloc(struct msg_msg *msg) -- cgit v1.2.3 From 37f670aacd481128ad9a940ac2d3372aecd92824 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:15 -0700 Subject: lsm: use lsm_prop in security_current_getsecid Change the security_current_getsecid_subj() and security_task_getsecid_obj() interfaces to fill in a lsm_prop structure instead of a u32 secid. Audit interfaces will need to collect all possible security data for possible reporting. Cc: linux-integrity@vger.kernel.org Cc: audit@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 6 +++--- include/linux/security.h | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 6ef2a345ea03..8a90fd9ff3c8 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -235,9 +235,9 @@ LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) -LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj, - struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop) +LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj, + struct task_struct *p, struct lsm_prop *prop) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) diff --git a/include/linux/security.h b/include/linux/security.h index 15aef5f68e77..9bc8153f4e8b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -507,8 +507,8 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); -void security_current_getsecid_subj(u32 *secid); -void security_task_getsecid_obj(struct task_struct *p, u32 *secid); +void security_current_getlsmprop_subj(struct lsm_prop *prop); +void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); @@ -1305,14 +1305,15 @@ static inline int security_task_getsid(struct task_struct *p) return 0; } -static inline void security_current_getsecid_subj(u32 *secid) +static inline void security_current_getlsmprop_subj(struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } -static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid) +static inline void security_task_getlsmprop_obj(struct task_struct *p, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_task_setnice(struct task_struct *p, int nice) -- cgit v1.2.3 From 07f9d2c1132c9b838538b606dfcdab2506cd2ae4 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:16 -0700 Subject: lsm: use lsm_prop in security_inode_getsecid Change the security_inode_getsecid() interface to fill in a lsm_prop structure instead of a u32 secid. This allows for its callers to gather data from all registered LSMs. Data is provided for IMA and audit. Change the name to security_inode_getlsmprop(). Cc: linux-integrity@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subj line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 ++- include/linux/security.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 8a90fd9ff3c8..23ad7e4f8c67 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -176,7 +176,8 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, const char *name, const void *value, size_t size, int flags) LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) -LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode, + struct lsm_prop *prop) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, const char *name) diff --git a/include/linux/security.h b/include/linux/security.h index 9bc8153f4e8b..2b19ef5d799c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -452,7 +452,7 @@ int security_inode_getsecurity(struct mnt_idmap *idmap, void **buffer, bool alloc); int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); -void security_inode_getsecid(struct inode *inode, u32 *secid); +void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop); int security_inode_copy_up(struct dentry *src, struct cred **new); int security_inode_copy_up_xattr(struct dentry *src, const char *name); int security_inode_setintegrity(const struct inode *inode, @@ -1076,9 +1076,10 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer, return 0; } -static inline void security_inode_getsecid(struct inode *inode, u32 *secid) +static inline void security_inode_getlsmprop(struct inode *inode, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_inode_copy_up(struct dentry *src, struct cred **new) -- cgit v1.2.3 From b0654ca42998440df42ba2ccc3b7dbe3bf5b7bb5 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:18 -0700 Subject: lsm: create new security_cred_getlsmprop LSM hook Create a new LSM hook security_cred_getlsmprop() which, like security_cred_getsecid(), fetches LSM specific attributes from the cred structure. The associated data elements in the audit sub-system are changed from a secid to a lsm_prop to accommodate multiple possible LSM audit users. Cc: linux-integrity@vger.kernel.org Cc: audit@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subj line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 23ad7e4f8c67..eb2937599cb0 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -218,6 +218,8 @@ LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old, LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new, const struct cred *old) LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c, + struct lsm_prop *prop) LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) diff --git a/include/linux/security.h b/include/linux/security.h index 2b19ef5d799c..acd2e5d1b0ff 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -488,6 +488,7 @@ void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_transfer_creds(struct cred *new, const struct cred *old); void security_cred_getsecid(const struct cred *c, u32 *secid); +void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); @@ -1229,6 +1230,10 @@ static inline void security_cred_getsecid(const struct cred *c, u32 *secid) *secid = 0; } +static inline void security_cred_getlsmprop(const struct cred *c, + struct lsm_prop *prop) +{ } + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; -- cgit v1.2.3 From 05a344e54d0b4892736526e4a309851da8ee9c89 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:20 -0700 Subject: netlabel,smack: use lsm_prop for audit data Replace the secid in the netlbl_audit structure with an lsm_prop. Remove scaffolding that was required when the value was a secid. Signed-off-by: Casey Schaufler [PM: fix the subject line] Signed-off-by: Paul Moore --- include/net/netlabel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 529160f76cac..8de8344ee93c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -97,7 +97,7 @@ struct calipso_doi; /* NetLabel audit information */ struct netlbl_audit { - u32 secid; + struct lsm_prop prop; kuid_t loginuid; unsigned int sessionid; }; -- cgit v1.2.3 From 8afd8c8faa24249e48f5007aee46209299377588 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:21 -0700 Subject: lsm: remove lsm_prop scaffolding Remove the scaffold member from the lsm_prop. Remove the remaining places it is being set. Signed-off-by: Casey Schaufler [PM: subj line tweak] Signed-off-by: Paul Moore --- include/linux/security.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index acd2e5d1b0ff..fd690fa73162 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -156,11 +156,6 @@ enum lockdown_reason { LOCKDOWN_CONFIDENTIALITY_MAX, }; -/* scaffolding */ -struct lsm_prop_scaffold { - u32 secid; -}; - /* * Data exported by the security modules */ @@ -169,7 +164,6 @@ struct lsm_prop { struct lsm_prop_smack smack; struct lsm_prop_apparmor apparmor; struct lsm_prop_bpf bpf; - struct lsm_prop_scaffold scaffold; }; extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; -- cgit v1.2.3 From 7e6c0cb33f7c2aa78b20724239bd7bda3a882652 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 10 Oct 2024 15:43:02 -0700 Subject: drm/i915/xe3lpd: reuse xe2lpd definition xe3_lpd display is functionally identical to xe2_lpd for now so reuse the device description. A separate xe3 definition will be added in the future if/when new feature flags are required. Signed-off-by: Clint Taylor Signed-off-by: Matt Atwood Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241010224311.50133-2-matthew.s.atwood@intel.com --- include/drm/intel/i915_pciids.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index 02156c6f79b6..6b92f8c3731b 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -794,4 +794,16 @@ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__) +/* PTL */ +#define INTEL_PTL_IDS(MACRO__, ...) \ + MACRO__(0xB080, ## __VA_ARGS__), \ + MACRO__(0xB081, ## __VA_ARGS__), \ + MACRO__(0xB082, ## __VA_ARGS__), \ + MACRO__(0xB090, ## __VA_ARGS__), \ + MACRO__(0xB091, ## __VA_ARGS__), \ + MACRO__(0xB092, ## __VA_ARGS__), \ + MACRO__(0xB0A0, ## __VA_ARGS__), \ + MACRO__(0xB0A1, ## __VA_ARGS__), \ + MACRO__(0xB0A2, ## __VA_ARGS__) + #endif /* _I915_PCIIDS_H */ -- cgit v1.2.3 From a716ff52bebfe806fbcf5227cee4c7bda4e6724b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:01 +0000 Subject: fib: rules: use READ_ONCE()/WRITE_ONCE() on ops->fib_rules_seq Using RTNL to protect ops->fib_rules_seq reads seems a big hammer. Writes are protected by RTNL. We can use READ_ONCE() on readers. Constify 'struct net' argument of fib_rules_seq_read() and lookup_rules_ops(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index d17855c52ef9..04383d90a1e3 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -176,7 +176,7 @@ int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); -unsigned int fib_rules_seq_read(struct net *net, int family); +unsigned int fib_rules_seq_read(const struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 16207384d29287a19f81436e1953b41946aa8258 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:02 +0000 Subject: ipv4: use READ_ONCE()/WRITE_ONCE() on net->ipv4.fib_seq Using RTNL to protect ops->fib_rules_seq reads seems a big hammer. Writes are protected by RTNL. We can use READ_ONCE() when reading it. Constify 'struct net' argument of fib4_rules_seq_read() Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 4 ++-- include/net/netns/ipv4.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 06130933542d..b6e44f4eaa4c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -347,7 +347,7 @@ static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, return 0; } -static inline unsigned int fib4_rules_seq_read(struct net *net) +static inline unsigned int fib4_rules_seq_read(const struct net *net) { return 0; } @@ -411,7 +411,7 @@ static inline bool fib4_has_custom_rules(const struct net *net) bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); -unsigned int fib4_rules_seq_read(struct net *net); +unsigned int fib4_rules_seq_read(const struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3b1de80b5c25..3c014170e001 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -262,7 +262,7 @@ struct netns_ipv4 { #endif struct fib_notifier_ops *notifier_ops; - unsigned int fib_seq; /* protected by rtnl_mutex */ + unsigned int fib_seq; /* writes protected by rtnl_mutex */ struct fib_notifier_ops *ipmr_notifier_ops; unsigned int ipmr_seq; /* protected by rtnl_mutex */ -- cgit v1.2.3 From e60ea45447768c48309b944596a8a34f6bae50e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:03 +0000 Subject: ipv6: use READ_ONCE()/WRITE_ONCE() on fib6_table->fib_seq Using RTNL to protect ops->fib_rules_seq reads seems a big hammer. Writes are protected by RTNL. We can use READ_ONCE() when reading it. Constify 'struct net' argument of fib6_tables_seq_read() and fib6_rules_seq_read(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6cb867ce4878..7c87873ae211 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -394,7 +394,7 @@ struct fib6_table { struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; - unsigned int fib_seq; + unsigned int fib_seq; /* writes protected by rtnl_mutex */ struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -563,7 +563,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); -unsigned int fib6_tables_seq_read(struct net *net); +unsigned int fib6_tables_seq_read(const struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); @@ -632,7 +632,7 @@ void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); -unsigned int fib6_rules_seq_read(struct net *net); +unsigned int fib6_rules_seq_read(const struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, @@ -676,7 +676,7 @@ static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, { return 0; } -static inline unsigned int fib6_rules_seq_read(struct net *net) +static inline unsigned int fib6_rules_seq_read(const struct net *net) { return 0; } -- cgit v1.2.3 From 2698acd6ea4770809af7e65bb8b3250e0a3a807e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:05 +0000 Subject: net: do not acquire rtnl in fib_seq_sum() After we made sure no fib_seq_read() handlers needs RTNL anymore, we can remove RTNL from fib_seq_sum(). Note that after RTNL was dropped, fib_seq_sum() result was possibly outdated anyway. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/fib_notifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 6d59221ff05a..48aad6128fea 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -28,7 +28,7 @@ enum fib_event_type { struct fib_notifier_ops { int family; struct list_head list; - unsigned int (*fib_seq_read)(struct net *net); + unsigned int (*fib_seq_read)(const struct net *net); int (*fib_dump)(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); struct module *owner; -- cgit v1.2.3 From ccb32f2357c08f64947e0fc32f9a9551ae69c6b3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 28 Aug 2024 16:24:07 +0200 Subject: media: v4l2-core: add v4l2_debugfs_root() This new function returns the dentry of the top-level debugfs "v4l2" directory. If it does not exist yet, then it is created first. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-dev.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index d82dfdbf6e58..1b6222fab24e 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -62,6 +62,7 @@ struct v4l2_ioctl_callbacks; struct video_device; struct v4l2_device; struct v4l2_ctrl_handler; +struct dentry; /** * enum v4l2_video_device_flags - Flags used by &struct video_device @@ -539,6 +540,20 @@ static inline int video_is_registered(struct video_device *vdev) return test_bit(V4L2_FL_REGISTERED, &vdev->flags); } +/** + * v4l2_debugfs_root - returns the dentry of the top-level "v4l2" debugfs dir + * + * If this directory does not yet exist, then it will be created. + */ +#ifdef CONFIG_DEBUG_FS +struct dentry *v4l2_debugfs_root(void); +#else +static inline struct dentry *v4l2_debugfs_root(void) +{ + return NULL; +} +#endif + #if defined(CONFIG_MEDIA_CONTROLLER) /** -- cgit v1.2.3 From 01c76fc995cdb1e701fa721c5b373777f0469b37 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 28 Aug 2024 16:24:08 +0200 Subject: media: v4l2-core: add v4l2_debugfs_if_alloc/free() Add new helpers to export received or transmitted HDMI InfoFrames to debugfs. This complements similar code in drm where the transmitted HDMI infoframes are exported to debugfs. The same names have been used as in drm, so this is consistent. The exported infoframes can be parsed with the edid-decode utility. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-dv-timings.h | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-dv-timings.h b/include/media/v4l2-dv-timings.h index 8fa963326bf6..13830411bd6c 100644 --- a/include/media/v4l2-dv-timings.h +++ b/include/media/v4l2-dv-timings.h @@ -8,6 +8,7 @@ #ifndef __V4L2_DV_TIMINGS_H #define __V4L2_DV_TIMINGS_H +#include #include /** @@ -251,4 +252,51 @@ void v4l2_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr); u16 v4l2_phys_addr_for_input(u16 phys_addr, u8 input); int v4l2_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port); +/* Add support for exporting InfoFrames to debugfs */ + +/* + * HDMI InfoFrames start with a 3 byte header, then a checksum, + * followed by the actual IF payload. + * + * The payload length is limited to 30 bytes according to the HDMI spec, + * but since the length is encoded in 5 bits, it can be 31 bytes theoretically. + * So set the max length as 31 + 3 (header) + 1 (checksum) = 35. + */ +#define V4L2_DEBUGFS_IF_MAX_LEN (35) + +#define V4L2_DEBUGFS_IF_AVI BIT(0) +#define V4L2_DEBUGFS_IF_AUDIO BIT(1) +#define V4L2_DEBUGFS_IF_SPD BIT(2) +#define V4L2_DEBUGFS_IF_HDMI BIT(3) + +typedef ssize_t (*v4l2_debugfs_if_read_t)(u32 type, void *priv, + struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos); + +struct v4l2_debugfs_if { + struct dentry *if_dir; + void *priv; + + v4l2_debugfs_if_read_t if_read; +}; + +#ifdef CONFIG_DEBUG_FS +struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types, + void *priv, + v4l2_debugfs_if_read_t if_read); +void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes); +#else +static inline +struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types, + void *priv, + v4l2_debugfs_if_read_t if_read) +{ + return NULL; +} + +static inline void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes) +{ +} +#endif + #endif -- cgit v1.2.3 From 8fa714ca334e0880665a14fed13b16e3e01a67b2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Oct 2024 21:15:35 +0300 Subject: iio: Convert unsigned to unsigned int Simple type conversion with no functional change implied. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20241010181535.3083262-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 +- include/linux/iio/iio.h | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 5aec3945555b..a89e7e43e441 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -70,7 +70,7 @@ struct iio_dev_opaque { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; - unsigned cached_reg_addr; + unsigned int cached_reg_addr; char read_buf[20]; unsigned int read_buf_len; #endif diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 18779b631e90..3a9b57187a95 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -282,11 +282,11 @@ struct iio_chan_spec { const struct iio_chan_spec_ext_info *ext_info; const char *extend_name; const char *datasheet_name; - unsigned modified:1; - unsigned indexed:1; - unsigned output:1; - unsigned differential:1; - unsigned has_ext_scan_type:1; + unsigned int modified:1; + unsigned int indexed:1; + unsigned int output:1; + unsigned int differential:1; + unsigned int has_ext_scan_type:1; }; @@ -541,13 +541,13 @@ struct iio_info { int (*update_scan_mode)(struct iio_dev *indio_dev, const unsigned long *scan_mask); int (*debugfs_reg_access)(struct iio_dev *indio_dev, - unsigned reg, unsigned writeval, - unsigned *readval); + unsigned int reg, unsigned int writeval, + unsigned int *readval); int (*fwnode_xlate)(struct iio_dev *indio_dev, const struct fwnode_reference_args *iiospec); - int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val); + int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned int val); int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev, - unsigned count); + unsigned int count); }; /** @@ -609,7 +609,7 @@ struct iio_dev { int scan_bytes; const unsigned long *available_scan_masks; - unsigned __private masklength; + unsigned int __private masklength; const unsigned long *active_scan_mask; bool scan_timestamp; struct iio_trigger *trig; -- cgit v1.2.3 From 06f5531958dd5decaabb21c6fa1da3dcaf8dfc24 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 26 Aug 2024 17:24:09 +0000 Subject: media: videodev2: Add flag to unconditionally enumerate pixel formats When the index is ORed with V4L2_FMTDESC_FLAG_ENUM_ALL the driver clears the flag and enumerate all the possible formats, ignoring any limitations from the current configuration. Drivers which do not support this flag yet always return an EINVAL. Signed-off-by: Benjamin Gaignard Signed-off-by: Hans Verkuil [hverkuil: improved doc when the new flag is not supported by the driver] --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 21a8aa575ea3..ded023edac70 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -907,6 +907,9 @@ struct v4l2_fmtdesc { #define V4L2_FMT_FLAG_CSC_QUANTIZATION 0x0100 #define V4L2_FMT_FLAG_META_LINE_BASED 0x0200 +/* Format description flag, to be ORed with the index */ +#define V4L2_FMTDESC_FLAG_ENUM_ALL 0x80000000 + /* Frame Size and frame rate enumeration */ /* * F R A M E S I Z E E N U M E R A T I O N -- cgit v1.2.3 From 454bbde8f0d465e93e5a3a4003ac6c7e62fa4473 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:19 +0800 Subject: net: skb: add pskb_network_may_pull_reason() helper Introduce the function pskb_network_may_pull_reason() and make pskb_network_may_pull() a simple inline call to it. The drop reasons of it just come from pskb_may_pull_reason. Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 39f1d16f3628..48f1e0fa2a13 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3130,9 +3130,15 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb) return skb_inner_network_header(skb) - skb->data; } +static inline enum skb_drop_reason +pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len) +{ + return pskb_may_pull_reason(skb, skb_network_offset(skb) + len); +} + static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) { - return pskb_may_pull(skb, skb_network_offset(skb) + len); + return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } /* -- cgit v1.2.3 From 7f20dbd7de7b9b2804e6bf54b0c22f2bc447cd64 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:20 +0800 Subject: net: tunnel: add pskb_inet_may_pull_reason() helper Introduce the function pskb_inet_may_pull_reason() and make pskb_inet_may_pull a simple inline call to it. The drop reasons of it just come from pskb_may_pull_reason(). Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 6194fbb564c6..7fc2f7bf837a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -439,7 +439,8 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); -static inline bool pskb_inet_may_pull(struct sk_buff *skb) +static inline enum skb_drop_reason +pskb_inet_may_pull_reason(struct sk_buff *skb) { int nhlen; @@ -456,7 +457,12 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) nhlen = 0; } - return pskb_network_may_pull(skb, nhlen); + return pskb_network_may_pull_reason(skb, nhlen); +} + +static inline bool pskb_inet_may_pull(struct sk_buff *skb) +{ + return pskb_inet_may_pull_reason(skb) == SKB_NOT_DROPPED_YET; } /* Variant of pskb_inet_may_pull(). -- cgit v1.2.3 From 9990ddf47d4168088e2246c3d418bf526e40830d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:21 +0800 Subject: net: tunnel: make skb_vlan_inet_prepare() return drop reasons Make skb_vlan_inet_prepare return the skb drop reasons, which is just what pskb_may_pull_reason() returns. Meanwhile, adjust all the call of it. Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 7fc2f7bf837a..4e4f9e24c9c1 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -467,11 +467,12 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, - bool inner_proto_inherit) +static inline enum skb_drop_reason +skb_vlan_inet_prepare(struct sk_buff *skb, bool inner_proto_inherit) { int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; + enum skb_drop_reason reason; /* Essentially this is skb_protocol(skb, true) * And we get MAC len. @@ -492,11 +493,13 @@ static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, /* For ETH_P_IPV6/ETH_P_IP we make sure to pull * a base network header in skb->head. */ - if (!pskb_may_pull(skb, maclen + nhlen)) - return false; + reason = pskb_may_pull_reason(skb, maclen + nhlen); + if (reason) + return reason; skb_set_network_header(skb, maclen); - return true; + + return SKB_NOT_DROPPED_YET; } static inline int ip_encap_hlen(struct ip_tunnel_encap *e) -- cgit v1.2.3 From 4c06d9daf8e6215447ca8a2ddd59fa09862c9bae Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:22 +0800 Subject: net: vxlan: add skb drop reasons to vxlan_rcv() Introduce skb drop reasons to the function vxlan_rcv(). Following new drop reasons are added: SKB_DROP_REASON_VXLAN_INVALID_HDR SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND SKB_DROP_REASON_IP_TUNNEL_ECN Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 4748680e8c88..98259d2b3e92 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -92,6 +92,9 @@ FN(PACKET_SOCK_ERROR) \ FN(TC_CHAIN_NOTFOUND) \ FN(TC_RECLASSIFY_LOOP) \ + FN(VXLAN_INVALID_HDR) \ + FN(VXLAN_VNI_NOT_FOUND) \ + FN(IP_TUNNEL_ECN) \ FNe(MAX) /** @@ -418,6 +421,19 @@ enum skb_drop_reason { * iterations. */ SKB_DROP_REASON_TC_RECLASSIFY_LOOP, + /** + * @SKB_DROP_REASON_VXLAN_INVALID_HDR: VXLAN header is invalid. E.g.: + * 1) reserved fields are not zero + * 2) "I" flag is not set + */ + SKB_DROP_REASON_VXLAN_INVALID_HDR, + /** @SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND: no VXLAN device found for VNI */ + SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND, + /** + * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to + * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. + */ + SKB_DROP_REASON_IP_TUNNEL_ECN, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From 289fd4e75219a96f77c5d679166035cd5118d139 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:24 +0800 Subject: net: vxlan: make vxlan_snoop() return drop reasons Change the return type of vxlan_snoop() from bool to enum skb_drop_reason. In this commit, two drop reasons are introduced: SKB_DROP_REASON_MAC_INVALID_SOURCE SKB_DROP_REASON_VXLAN_ENTRY_EXISTS Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 98259d2b3e92..1cb8d7c953be 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -94,6 +94,8 @@ FN(TC_RECLASSIFY_LOOP) \ FN(VXLAN_INVALID_HDR) \ FN(VXLAN_VNI_NOT_FOUND) \ + FN(MAC_INVALID_SOURCE) \ + FN(VXLAN_ENTRY_EXISTS) \ FN(IP_TUNNEL_ECN) \ FNe(MAX) @@ -429,6 +431,13 @@ enum skb_drop_reason { SKB_DROP_REASON_VXLAN_INVALID_HDR, /** @SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND: no VXLAN device found for VNI */ SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND, + /** @SKB_DROP_REASON_MAC_INVALID_SOURCE: source mac is invalid */ + SKB_DROP_REASON_MAC_INVALID_SOURCE, + /** + * @SKB_DROP_REASON_VXLAN_ENTRY_EXISTS: trying to migrate a static + * entry or an entry pointing to a nexthop. + */ + SKB_DROP_REASON_VXLAN_ENTRY_EXISTS, /** * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. -- cgit v1.2.3 From d209706f562ee4fa81bdf24cf6b679c3222aa06c Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:25 +0800 Subject: net: vxlan: make vxlan_set_mac() return drop reasons Change the return type of vxlan_set_mac() from bool to enum skb_drop_reason. In this commit, the drop reason "SKB_DROP_REASON_LOCAL_MAC" is introduced for the case that the source mac of the packet is a local mac. Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 1cb8d7c953be..fbf92d442c1b 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -97,6 +97,7 @@ FN(MAC_INVALID_SOURCE) \ FN(VXLAN_ENTRY_EXISTS) \ FN(IP_TUNNEL_ECN) \ + FN(LOCAL_MAC) \ FNe(MAX) /** @@ -443,6 +444,11 @@ enum skb_drop_reason { * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. */ SKB_DROP_REASON_IP_TUNNEL_ECN, + /** + * @SKB_DROP_REASON_LOCAL_MAC: the source MAC address is equal to + * the MAC address of the local netdev. + */ + SKB_DROP_REASON_LOCAL_MAC, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From b71a576e452b800efeac49ecca116d954601d911 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:26 +0800 Subject: net: vxlan: use kfree_skb_reason() in vxlan_xmit() Replace kfree_skb() with kfree_skb_reason() in vxlan_xmit(). Following new skb drop reasons are introduced for vxlan: /* no remote found for xmit */ SKB_DROP_REASON_VXLAN_NO_REMOTE /* packet without necessary metadata reached a device which is * in "external" mode */ SKB_DROP_REASON_TUNNEL_TXINFO Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index fbf92d442c1b..d59bb96c5a02 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -96,7 +96,9 @@ FN(VXLAN_VNI_NOT_FOUND) \ FN(MAC_INVALID_SOURCE) \ FN(VXLAN_ENTRY_EXISTS) \ + FN(VXLAN_NO_REMOTE) \ FN(IP_TUNNEL_ECN) \ + FN(TUNNEL_TXINFO) \ FN(LOCAL_MAC) \ FNe(MAX) @@ -439,11 +441,18 @@ enum skb_drop_reason { * entry or an entry pointing to a nexthop. */ SKB_DROP_REASON_VXLAN_ENTRY_EXISTS, + /** @SKB_DROP_REASON_VXLAN_NO_REMOTE: no remote found for xmit */ + SKB_DROP_REASON_VXLAN_NO_REMOTE, /** * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. */ SKB_DROP_REASON_IP_TUNNEL_ECN, + /** + * @SKB_DROP_REASON_TUNNEL_TXINFO: packet without necessary metadata + * reached a device which is in "external" mode. + */ + SKB_DROP_REASON_TUNNEL_TXINFO, /** * @SKB_DROP_REASON_LOCAL_MAC: the source MAC address is equal to * the MAC address of the local netdev. -- cgit v1.2.3 From 7948483001039c00dcff4b94c181d7e14693a38c Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:52 +0200 Subject: misc: keba: Add SPI controller device Add support for the SPI controller auxiliary device. This enables access to the SPI flash of the FPGA and some other SPI devices. The actual list of SPI devices is detected by reading some bits out of the previously registered I2C EEPROM. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-4-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- include/linux/misc/keba.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 323b31a847c5..1bd5409c6f6f 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -7,6 +7,7 @@ #include struct i2c_board_info; +struct spi_board_info; /** * struct keba_i2c_auxdev - KEBA I2C auxiliary device @@ -22,4 +23,18 @@ struct keba_i2c_auxdev { struct i2c_board_info *info; }; +/** + * struct keba_spi_auxdev - KEBA SPI auxiliary device + * @auxdev: auxiliary device object + * @io: address range of SPI controller IO memory + * @info_size: number of SPI devices to be probed + * @info: SPI devices to be probed + */ +struct keba_spi_auxdev { + struct auxiliary_device auxdev; + struct resource io; + int info_size; + struct spi_board_info *info; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From f965d315bcbd65adfe5e3c161e46b5dc0a463f68 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:55 +0200 Subject: misc: keba: Add fan device Add support for the fan auxiliary device. This enables monitoring of the fan. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-7-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- include/linux/misc/keba.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 1bd5409c6f6f..451777acc262 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -37,4 +37,14 @@ struct keba_spi_auxdev { struct spi_board_info *info; }; +/** + * struct keba_fan_auxdev - KEBA fan auxiliary device + * @auxdev: auxiliary device object + * @io: address range of fan controller IO memory + */ +struct keba_fan_auxdev { + struct auxiliary_device auxdev; + struct resource io; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From ca7b844b91920573835ad11daaa30630ce112fe1 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:56 +0200 Subject: misc: keba: Add battery device Add support for the battery auxiliary device. This enables monitoring of the battery. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-8-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- include/linux/misc/keba.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 451777acc262..ca52716f8437 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -47,4 +47,14 @@ struct keba_fan_auxdev { struct resource io; }; +/** + * struct keba_batt_auxdev - KEBA battery auxiliary device + * @auxdev: auxiliary device object + * @io: address range of battery controller IO memory + */ +struct keba_batt_auxdev { + struct auxiliary_device auxdev; + struct resource io; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From a27b406a49225a17849c86221a32f2d598702719 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:57 +0200 Subject: misc: keba: Add UART devices Add support for the UART auxiliary devices. This enables access to up to 3 different UARTs, which are implemented in the FPGA. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-9-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- include/linux/misc/keba.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index ca52716f8437..a81d6fa70851 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -57,4 +57,16 @@ struct keba_batt_auxdev { struct resource io; }; +/** + * struct keba_uart_auxdev - KEBA UART auxiliary device + * @auxdev: auxiliary device object + * @io: address range of UART controller IO memory + * @irq: number of UART controller interrupt + */ +struct keba_uart_auxdev { + struct auxiliary_device auxdev; + struct resource io; + unsigned int irq; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From cec78a59abc9b1a06f742cb96479fc0bb1fe4e90 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 17 Sep 2024 18:42:56 +0800 Subject: list: Remove duplicated and unused macro list_for_each_reverse Remove macro list_for_each_reverse due to below reasons: - it is same as list_for_each_prev. - it is not used by current kernel tree. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240917-fix_list-v2-1-d2914665e89f@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/list.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 5f4b0a39cf46..29a375889fb8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,14 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) -/** - * list_for_each_reverse - iterate backwards over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_reverse(pos, head) \ - for (pos = (head)->prev; pos != (head); pos = pos->prev) - /** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. -- cgit v1.2.3 From 0ebe74c53b8b62bde7b02415d28e75aa25d6c2e6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 29 Sep 2024 15:11:12 +0100 Subject: drivers/base: Remove unused auxiliary_find_device auxiliary_find_device has been unused since commit 1c5de097bea3 ("net/mlx5: Fix mlx5_get_next_dev() peer device matching") which was the only use since it was originally added. Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20240929141112.69824-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 31762324bcc9..65dd7f154374 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -269,8 +269,4 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) -struct auxiliary_device *auxiliary_find_device(struct device *start, - const void *data, - device_match_t match); - #endif /* _AUXILIARY_BUS_H_ */ -- cgit v1.2.3 From cd068d51594d9635bf6688fc78717572b78bce6a Mon Sep 17 00:00:00 2001 From: Keita Aihara Date: Fri, 13 Sep 2024 18:44:17 +0900 Subject: mmc: core: Add SD card quirk for broken poweroff notification GIGASTONE Gaming Plus microSD cards manufactured on 02/2022 report that they support poweroff notification and cache, but they are not working correctly. Flush Cache bit never gets cleared in sd_flush_cache() and Poweroff Notification Ready bit also never gets set to 1 within 1 second from the end of busy of CMD49 in sd_poweroff_notify(). This leads to I/O error and runtime PM error state. I observed that the same card manufactured on 01/2024 works as expected. This problem seems similar to the Kingston cards fixed with commit c467c8f08185 ("mmc: Add MMC_QUIRK_BROKEN_SD_CACHE for Kingston Canvas Go Plus from 11/2019") and should be handled using quirks. CID for the problematic card is here. 12345641535443002000000145016200 Manufacturer ID is 0x12 and defined as CID_MANFID_GIGASTONE as of now, but would like comments on what naming is appropriate because MID list is not public and not sure it's right. Signed-off-by: Keita Aihara Link: https://lore.kernel.org/r/20240913094417.GA4191647@sony.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index f34407cc2788..543446392776 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -294,6 +294,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ +#define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ -- cgit v1.2.3 From fce2ce78af1e14dc1316aaddb5b3308be05cf452 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 6 Oct 2024 08:11:39 +0300 Subject: mmc: sd: SDUC Support Recognition Ultra Capacity SD cards (SDUC) was already introduced in SD7.0. Those cards support capacity larger than 2TB and up to including 128TB. ACMD41 was extended to support the host-card handshake during initialization. The card expects that the HCS & HO2T bits to be set in the command argument, and sets the applicable bits in the R3 returned response. On the contrary, if a SDUC card is inserted to a non-supporting host, it will never respond to this ACMD41 until eventually, the host will timed out and give up. Also, add SD CSD version 3.0 - designated for SDUC, and properly parse the csd register as the c_size field got expanded to 28 bits. Do not enable SDUC for now - leave it to the last patch in the series. Tested-by: Ricky WU Reviewed-by: Adrian Hunter Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241006051148.160278-2-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 2 +- include/linux/mmc/sd.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 543446392776..eb67d3d5ff5b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -35,7 +35,7 @@ struct mmc_csd { unsigned int wp_grp_size; unsigned int read_blkbits; unsigned int write_blkbits; - unsigned int capacity; + sector_t capacity; unsigned int read_partial:1, read_misalign:1, write_partial:1, diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index 6727576a8755..865cc0ca8543 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -36,6 +36,7 @@ /* OCR bit definitions */ #define SD_OCR_S18R (1 << 24) /* 1.8V switching request */ #define SD_ROCR_S18A SD_OCR_S18R /* 1.8V switching accepted by card */ +#define SD_OCR_2T (1 << 27) /* HO2T/CO2T - SDUC support */ #define SD_OCR_XPC (1 << 28) /* SDXC power control */ #define SD_OCR_CCS (1 << 30) /* Card Capacity Status */ -- cgit v1.2.3 From 375b535941bea65b37451f0fd398e28bf4f3bdc3 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 6 Oct 2024 08:11:40 +0300 Subject: mmc: sd: Add Extension memory addressing SDUC memory addressing spans beyond 2TB and up to 128TB. Therefore, 38 bits are required to access the entire memory space of all sectors. Those extra 6 bits are to be carried by CMD22 prior of sending read/write/erase commands: CMD17, CMD18, CMD24, CMD25, CMD32, and CMD33. CMD22 will carry the higher order 6 bits, and must precedes any of the above commands even if it targets sector < 2TB. No error related to address or length is indicated in CMD22 but rather in the read/write command itself. Tested-by: Ricky WU Reviewed-by: Adrian Hunter Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241006051148.160278-3-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- include/linux/mmc/sd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index 865cc0ca8543..af5fc70e09a2 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -15,6 +15,9 @@ #define SD_SEND_IF_COND 8 /* bcr [11:0] See below R7 */ #define SD_SWITCH_VOLTAGE 11 /* ac R1 */ +/* Class 2 */ +#define SD_ADDR_EXT 22 /* ac [5:0] R1 */ + /* class 10 */ #define SD_SWITCH 6 /* adtc [31:0] See below R1 */ -- cgit v1.2.3 From 403a0293f1c230524e0185b31f69c02a6aed12c7 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 6 Oct 2024 08:11:42 +0300 Subject: mmc: core: Add open-ended Ext memory addressing For open-ended read/write - just send CMD22 before issuing the command. Reviewed-by: Adrian Hunter Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241006051148.160278-5-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index f0ac2e469b32..a890a71288ef 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -96,6 +96,10 @@ struct mmc_command { unsigned int busy_timeout; /* busy detect timeout in ms */ struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ + + /* for SDUC */ + bool has_ext_addr; + u8 ext_addr; }; struct mmc_data { -- cgit v1.2.3 From 79daeb241db7901e4bd53cce9ab046f376a63a4c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2024 18:28:16 +0800 Subject: mmc: core: Prepare to support SD UHS-II cards The SD UHS-II interface was introduced to the SD spec v4.00 several years ago. The interface is fundamentally different from an electrical and a protocol point of view, comparing to the legacy SD interface. However, the legacy SD protocol is supported through a specific transport layer (SD-TRAN) defined in the UHS-II addendum of the spec. This allows the SD card to be managed in a very similar way as a legacy SD card, hence a lot of code can be re-used to support these new types of cards through the mmc subsystem. Moreover, an SD card that supports the UHS-II interface shall also be backwards compatible with the legacy SD interface, which allows a UHS-II card to be inserted into a legacy slot. As a matter of fact, this is already supported by mmc subsystem as of today. To prepare to add support for UHS-II, this change puts the basic foundation in the mmc core in place, allowing it to be more easily reviewed before subsequent changes implements the actual support. Basically, the approach here adds a new UHS-II bus_ops type and adds a separate initialization path for the UHS-II card. The intent is to avoid us from sprinkling the legacy initialization path, but also to simplify implementation of the UHS-II specific bits. At this point, there is only one new host ops added to manage the various ios settings needed for UHS-II. Additional host ops that are needed, are being added from subsequent changes. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20240913102836.6144-3-victorshihgli@gmail.com --- include/linux/mmc/card.h | 7 +++++++ include/linux/mmc/host.h | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index eb67d3d5ff5b..0bb1ad1ea4dc 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -209,6 +209,11 @@ struct sd_ext_reg { #define SD_EXT_PERF_CMD_QUEUE (1<<4) }; +struct sd_uhs2_config { + u32 node_id; + /* TODO: Extend with more register configs. */ +}; + struct sdio_cccr { unsigned int sdio_vsn; unsigned int sd_vsn; @@ -320,6 +325,8 @@ struct mmc_card { struct sd_ext_reg ext_power; /* SD extension reg for PM */ struct sd_ext_reg ext_perf; /* SD extension reg for PERF */ + struct sd_uhs2_config uhs2_config; /* SD UHS-II config */ + unsigned int sdio_funcs; /* number of SDIO functions */ atomic_t sdio_funcs_probed; /* number of probed SDIO funcs */ struct sdio_cccr cccr; /* common card info */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8fc2b328ec4d..e1d380de3aaa 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -64,6 +64,10 @@ struct mmc_ios { #define MMC_TIMING_MMC_HS400 10 #define MMC_TIMING_SD_EXP 11 #define MMC_TIMING_SD_EXP_1_2V 12 +#define MMC_TIMING_UHS2_SPEED_A 13 +#define MMC_TIMING_UHS2_SPEED_A_HD 14 +#define MMC_TIMING_UHS2_SPEED_B 15 +#define MMC_TIMING_UHS2_SPEED_B_HD 16 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -92,6 +96,14 @@ struct mmc_clk_phase_map { struct mmc_clk_phase phase[MMC_NUM_CLK_PHASES]; }; +struct sd_uhs2_caps { + /* TODO: Add UHS-II capabilities for the host. */ +}; + +enum sd_uhs2_operation { + UHS2_SET_IOS, +}; + struct mmc_host; enum mmc_err_stat { @@ -219,6 +231,14 @@ struct mmc_host_ops { /* Initialize an SD express card, mandatory for MMC_CAP2_SD_EXP. */ int (*init_sd_express)(struct mmc_host *host, struct mmc_ios *ios); + + /* + * The uhs2_control callback is used to execute SD UHS-II specific + * operations. It's mandatory to implement for hosts that supports the + * SD UHS-II interface (MMC_CAP2_SD_UHS2). Expected return values are a + * negative errno in case of a failure or zero for success. + */ + int (*uhs2_control)(struct mmc_host *host, enum sd_uhs2_operation op); }; struct mmc_cqe_ops { @@ -379,6 +399,7 @@ struct mmc_host { MMC_CAP2_HS200_1_2V_SDR) #define MMC_CAP2_SD_EXP (1 << 7) /* SD express via PCIe */ #define MMC_CAP2_SD_EXP_1_2V (1 << 8) /* SD express 1.2V */ +#define MMC_CAP2_SD_UHS2 (1 << 9) /* SD UHS-II support */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ @@ -405,6 +426,8 @@ struct mmc_host { #endif #define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */ + struct sd_uhs2_caps uhs2_caps; /* Host UHS-II capabilities */ + int fixed_drv_type; /* fixed driver type for non-removable media */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3 From 153196d550c747367bdbec5cd545a572c5310451 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2024 18:28:17 +0800 Subject: mmc: core: Announce successful insertion of an SD UHS-II card To inform the users about SD UHS-II cards, let's extend the print at card insertion with a "UHS-II" substring. Within this change, it seems reasonable to convert from using "ultra high speed" into "UHS-I speed", for the UHS-I type, as it should makes it more clear. Note that, the new print for UHS-II cards doesn't include the actual selected speed mode. Instead, this is going to be added from subsequent change. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20240913102836.6144-4-victorshihgli@gmail.com --- include/linux/mmc/host.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e1d380de3aaa..9a148280bf42 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -638,6 +638,14 @@ static inline int mmc_card_uhs(struct mmc_card *card) card->host->ios.timing <= MMC_TIMING_UHS_DDR50; } +static inline bool mmc_card_uhs2(struct mmc_host *host) +{ + return host->ios.timing == MMC_TIMING_UHS2_SPEED_A || + host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD; +} + void mmc_retune_timer_stop(struct mmc_host *host); static inline void mmc_retune_needed(struct mmc_host *host) -- cgit v1.2.3 From a56ffd3a83ed2e10e0d9e0b199547bfa0d206aac Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2024 18:28:18 +0800 Subject: mmc: core: Extend support for mmc regulators with a vqmmc2 To allow an additional external regulator to be controlled by an mmc host driver, let's add support for a vqmmc2 regulator to the mmc core. For an SD UHS-II interface the vqmmc2 regulator may correspond to the so called vdd2 supply, as described by the SD spec. Initially, only 1.8V is needed, hence limit the new helper function, mmc_regulator_set_vqmmc2() to this too. Note that, to allow for flexibility mmc host drivers need to manage the enable/disable of the vqmmc2 regulator themselves, while the regulator is looked up through the common mmc_regulator_get_supply(). Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20240913102836.6144-5-victorshihgli@gmail.com --- include/linux/mmc/host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9a148280bf42..a7f790c75bc8 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -75,6 +75,9 @@ struct mmc_ios { #define MMC_SIGNAL_VOLTAGE_180 1 #define MMC_SIGNAL_VOLTAGE_120 2 + unsigned char vqmmc2_voltage; +#define MMC_VQMMC2_VOLTAGE_180 0 + unsigned char drv_type; /* driver type (A, B, C, D) */ #define MMC_SET_DRIVER_TYPE_B 0 @@ -308,6 +311,7 @@ struct mmc_pwrseq; struct mmc_supply { struct regulator *vmmc; /* Card power supply */ struct regulator *vqmmc; /* Optional Vccq supply */ + struct regulator *vqmmc2; /* Optional supply for phy */ }; struct mmc_ctx { @@ -590,6 +594,7 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, unsigned short vdd_bit); int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios); +int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, struct mmc_ios *ios); #else static inline int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, @@ -603,6 +608,12 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc, { return -EINVAL; } + +static inline int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + return -EINVAL; +} #endif int mmc_regulator_get_supply(struct mmc_host *mmc); -- cgit v1.2.3 From a9a75f9dc23c1562dcb261c0a8f3d6fc70d246cd Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Fri, 13 Sep 2024 18:28:19 +0800 Subject: mmc: core: Add definitions for SD UHS-II cards Add UHS-II specific data structures for commands and defines for registers, as described in Part 1 UHS-II Addendum Version 1.01. UHS-II related definitions are listed below: 1. UHS-II card capability: sd_uhs2_caps{} 2. UHS-II configuration: sd_uhs2_config{} 3. UHS-II register I/O address and register field definitions: sd_uhs2.h Signed-off-by: Jason Lai Signed-off-by: Victor Shih Link: https://lore.kernel.org/r/20240913102836.6144-6-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 31 +++++- include/linux/mmc/host.h | 25 ++++- include/linux/mmc/sd_uhs2.h | 240 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+), 2 deletions(-) create mode 100644 include/linux/mmc/sd_uhs2.h (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 0bb1ad1ea4dc..526fce581657 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -188,6 +188,12 @@ struct sd_switch_caps { #define SD_MAX_CURRENT_400 (1 << SD_SET_CURRENT_LIMIT_400) #define SD_MAX_CURRENT_600 (1 << SD_SET_CURRENT_LIMIT_600) #define SD_MAX_CURRENT_800 (1 << SD_SET_CURRENT_LIMIT_800) + +#define SD4_SET_POWER_LIMIT_0_72W 0 +#define SD4_SET_POWER_LIMIT_1_44W 1 +#define SD4_SET_POWER_LIMIT_2_16W 2 +#define SD4_SET_POWER_LIMIT_2_88W 3 +#define SD4_SET_POWER_LIMIT_1_80W 4 }; struct sd_ext_reg { @@ -211,7 +217,30 @@ struct sd_ext_reg { struct sd_uhs2_config { u32 node_id; - /* TODO: Extend with more register configs. */ + + u32 n_fcu; + u32 maxblk_len; + u8 n_lanes; + u8 dadr_len; + u8 app_type; + u8 phy_minor_rev; + u8 phy_major_rev; + u8 can_hibernate; + u8 n_lss_sync; + u8 n_lss_dir; + u8 link_minor_rev; + u8 link_major_rev; + u8 dev_type; + u8 n_data_gap; + + u32 n_fcu_set; + u32 maxblk_len_set; + u8 n_lanes_set; + u8 speed_range_set; + u8 n_lss_sync_set; + u8 n_lss_dir_set; + u8 n_data_gap_set; + u8 max_retry_set; }; struct sdio_cccr { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a7f790c75bc8..0980d06ed419 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -17,6 +17,7 @@ #include #include #include +#include struct mmc_ios { unsigned int clock; /* clock rate */ @@ -100,7 +101,29 @@ struct mmc_clk_phase_map { }; struct sd_uhs2_caps { - /* TODO: Add UHS-II capabilities for the host. */ + u32 dap; + u32 gap; + u32 group_desc; + u32 maxblk_len; + u32 n_fcu; + u8 n_lanes; + u8 addr64; + u8 card_type; + u8 phy_rev; + u8 speed_range; + u8 n_lss_sync; + u8 n_lss_dir; + u8 link_rev; + u8 host_type; + u8 n_data_gap; + + u32 maxblk_len_set; + u32 n_fcu_set; + u8 n_lanes_set; + u8 n_lss_sync_set; + u8 n_lss_dir_set; + u8 n_data_gap_set; + u8 max_retry_set; }; enum sd_uhs2_operation { diff --git a/include/linux/mmc/sd_uhs2.h b/include/linux/mmc/sd_uhs2.h new file mode 100644 index 000000000000..7abe9bd870c7 --- /dev/null +++ b/include/linux/mmc/sd_uhs2.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Header file for UHS-II packets, Host Controller registers and I/O + * accessors. + * + * Copyright (C) 2014 Intel Corp, All Rights Reserved. + */ +#ifndef LINUX_MMC_UHS2_H +#define LINUX_MMC_UHS2_H + +/* LINK Layer definition */ +/* + * UHS2 Header: + * Refer to UHS-II Addendum Version 1.02 Figure 5-2, the format of CCMD Header is described below: + * bit [3:0] : DID(Destination ID = Node ID of UHS2 card) + * bit [6:4] : TYP(Packet Type) + * 000b: CCMD(Control command packet) + * 001b: DCMD(Data command packet) + * 010b: RES(Response packet) + * 011b: DATA(Data payload packet) + * 111b: MSG(Message packet) + * Others: Reserved + * bit [7] : NP(Native Packet) + * bit [10:8] : TID(Transaction ID) + * bit [11] : Reserved + * bit [15:12]: SID(Source ID 0: Node ID of Host) + * + * Broadcast CCMD issued by Host is represented as DID=SID=0. + */ +/* + * UHS2 Argument: + * Refer to UHS-II Addendum Version 1.02 Figure 6-5, the format of CCMD Argument is described below: + * bit [3:0] : MSB of IOADR + * bit [5:4] : PLEN(Payload Length) + * 00b: 0 byte + * 01b: 4 bytes + * 10b: 8 bytes + * 11b: 16 bytes + * bit [6] : Reserved + * bit [7] : R/W(Read/Write) + * 0: Control read command + * 1: Control write command + * bit [15:8] : LSB of IOADR + * + * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD. + * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last. + */ +#define UHS2_NATIVE_PACKET_POS 7 +#define UHS2_NATIVE_PACKET (1 << UHS2_NATIVE_PACKET_POS) + +#define UHS2_PACKET_TYPE_POS 4 +#define UHS2_PACKET_TYPE_CCMD (0 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_DCMD (1 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_RES (2 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_DATA (3 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_MSG (7 << UHS2_PACKET_TYPE_POS) + +#define UHS2_DEST_ID_MASK 0x0F +#define UHS2_DEST_ID 0x1 + +#define UHS2_SRC_ID_POS 12 +#define UHS2_SRC_ID_MASK 0xF000 + +#define UHS2_TRANS_ID_POS 8 +#define UHS2_TRANS_ID_MASK 0x0700 + +/* UHS2 MSG */ +#define UHS2_MSG_CTG_POS 5 +#define UHS2_MSG_CTG_LMSG 0x00 +#define UHS2_MSG_CTG_INT 0x60 +#define UHS2_MSG_CTG_AMSG 0x80 + +#define UHS2_MSG_CTG_FCREQ 0x00 +#define UHS2_MSG_CTG_FCRDY 0x01 +#define UHS2_MSG_CTG_STAT 0x02 + +#define UHS2_MSG_CODE_POS 8 +#define UHS2_MSG_CODE_FC_UNRECOVER_ERR 0x8 +#define UHS2_MSG_CODE_STAT_UNRECOVER_ERR 0x8 +#define UHS2_MSG_CODE_STAT_RECOVER_ERR 0x1 + +/* TRANS Layer definition */ + +/* Native packets*/ +#define UHS2_NATIVE_CMD_RW_POS 7 +#define UHS2_NATIVE_CMD_WRITE (1 << UHS2_NATIVE_CMD_RW_POS) +#define UHS2_NATIVE_CMD_READ (0 << UHS2_NATIVE_CMD_RW_POS) + +#define UHS2_NATIVE_CMD_PLEN_POS 4 +#define UHS2_NATIVE_CMD_PLEN_4B (1 << UHS2_NATIVE_CMD_PLEN_POS) +#define UHS2_NATIVE_CMD_PLEN_8B (2 << UHS2_NATIVE_CMD_PLEN_POS) +#define UHS2_NATIVE_CMD_PLEN_16B (3 << UHS2_NATIVE_CMD_PLEN_POS) + +#define UHS2_NATIVE_CCMD_GET_MIOADR_MASK 0xF00 +#define UHS2_NATIVE_CCMD_MIOADR_MASK 0x0F + +#define UHS2_NATIVE_CCMD_LIOADR_POS 8 +#define UHS2_NATIVE_CCMD_GET_LIOADR_MASK 0x0FF + +#define UHS2_CCMD_DEV_INIT_COMPLETE_FLAG BIT(11) +#define UHS2_DEV_INIT_PAYLOAD_LEN 1 +#define UHS2_DEV_INIT_RESP_LEN 6 +#define UHS2_DEV_ENUM_PAYLOAD_LEN 1 +#define UHS2_DEV_ENUM_RESP_LEN 8 +#define UHS2_CFG_WRITE_PAYLOAD_LEN 2 +#define UHS2_CFG_WRITE_PHY_SET_RESP_LEN 4 +#define UHS2_CFG_WRITE_GENERIC_SET_RESP_LEN 5 +#define UHS2_GO_DORMANT_PAYLOAD_LEN 1 + +/* + * UHS2 Argument: + * Refer to UHS-II Addendum Version 1.02 Figure 6-8, the format of DCMD Argument is described below: + * bit [3:0] : Reserved + * bit [6:3] : TMODE(Transfer Mode) + * bit 3: DAM(Data Access Mode) + * bit 4: TLUM(TLEN Unit Mode) + * bit 5: LM(Length Mode) + * bit 6: DM(Duplex Mode) + * bit [7] : R/W(Read/Write) + * 0: Control read command + * 1: Control write command + * bit [15:8] : Reserved + * + * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD. + * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last. + */ +#define UHS2_DCMD_DM_POS 6 +#define UHS2_DCMD_2L_HD_MODE (1 << UHS2_DCMD_DM_POS) +#define UHS2_DCMD_LM_POS 5 +#define UHS2_DCMD_LM_TLEN_EXIST (1 << UHS2_DCMD_LM_POS) +#define UHS2_DCMD_TLUM_POS 4 +#define UHS2_DCMD_TLUM_BYTE_MODE (1 << UHS2_DCMD_TLUM_POS) +#define UHS2_NATIVE_DCMD_DAM_POS 3 +#define UHS2_NATIVE_DCMD_DAM_IO (1 << UHS2_NATIVE_DCMD_DAM_POS) + +#define UHS2_RES_NACK_POS 7 +#define UHS2_RES_NACK_MASK (0x1 << UHS2_RES_NACK_POS) + +#define UHS2_RES_ECODE_POS 4 +#define UHS2_RES_ECODE_MASK 0x7 +#define UHS2_RES_ECODE_COND 1 +#define UHS2_RES_ECODE_ARG 2 +#define UHS2_RES_ECODE_GEN 3 + +/* IOADR of device registers */ +#define UHS2_IOADR_GENERIC_CAPS 0x00 +#define UHS2_IOADR_PHY_CAPS 0x02 +#define UHS2_IOADR_LINK_CAPS 0x04 +#define UHS2_IOADR_RSV_CAPS 0x06 +#define UHS2_IOADR_GENERIC_SETTINGS 0x08 +#define UHS2_IOADR_PHY_SETTINGS 0x0A +#define UHS2_IOADR_LINK_SETTINGS 0x0C +#define UHS2_IOADR_PRESET 0x40 + +/* SD application packets */ +#define UHS2_SD_CMD_INDEX_POS 8 + +#define UHS2_SD_CMD_APP_POS 14 +#define UHS2_SD_CMD_APP (1 << UHS2_SD_CMD_APP_POS) + +/* UHS-II Device Registers */ +#define UHS2_DEV_CONFIG_REG 0x000 + +/* General Caps and Settings registers */ +#define UHS2_DEV_CONFIG_GEN_CAPS (UHS2_DEV_CONFIG_REG + 0x000) +#define UHS2_DEV_CONFIG_N_LANES_POS 8 +#define UHS2_DEV_CONFIG_N_LANES_MASK 0x3F +#define UHS2_DEV_CONFIG_2L_HD_FD 0x1 +#define UHS2_DEV_CONFIG_2D1U_FD 0x2 +#define UHS2_DEV_CONFIG_1D2U_FD 0x4 +#define UHS2_DEV_CONFIG_2D2U_FD 0x8 +#define UHS2_DEV_CONFIG_DADR_POS 14 +#define UHS2_DEV_CONFIG_DADR_MASK 0x1 +#define UHS2_DEV_CONFIG_APP_POS 16 +#define UHS2_DEV_CONFIG_APP_MASK 0xFF +#define UHS2_DEV_CONFIG_APP_SD_MEM 0x1 + +#define UHS2_DEV_CONFIG_GEN_SET (UHS2_DEV_CONFIG_REG + 0x008) +#define UHS2_DEV_CONFIG_GEN_SET_N_LANES_POS 8 +#define UHS2_DEV_CONFIG_GEN_SET_2L_FD_HD 0x0 +#define UHS2_DEV_CONFIG_GEN_SET_2D1U_FD 0x2 +#define UHS2_DEV_CONFIG_GEN_SET_1D2U_FD 0x3 +#define UHS2_DEV_CONFIG_GEN_SET_2D2U_FD 0x4 +#define UHS2_DEV_CONFIG_GEN_SET_CFG_COMPLETE BIT(31) + +/* PHY Caps and Settings registers */ +#define UHS2_DEV_CONFIG_PHY_CAPS (UHS2_DEV_CONFIG_REG + 0x002) +#define UHS2_DEV_CONFIG_PHY_MINOR_MASK 0xF +#define UHS2_DEV_CONFIG_PHY_MAJOR_POS 4 +#define UHS2_DEV_CONFIG_PHY_MAJOR_MASK 0x3 +#define UHS2_DEV_CONFIG_CAN_HIBER_POS 15 +#define UHS2_DEV_CONFIG_CAN_HIBER_MASK 0x1 +#define UHS2_DEV_CONFIG_PHY_CAPS1 (UHS2_DEV_CONFIG_REG + 0x003) +#define UHS2_DEV_CONFIG_N_LSS_SYN_MASK 0xF +#define UHS2_DEV_CONFIG_N_LSS_DIR_POS 4 +#define UHS2_DEV_CONFIG_N_LSS_DIR_MASK 0xF + +#define UHS2_DEV_CONFIG_PHY_SET (UHS2_DEV_CONFIG_REG + 0x00A) +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_POS 6 +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_A 0x0 +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_B 0x1 + +/* LINK-TRAN Caps and Settings registers */ +#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS (UHS2_DEV_CONFIG_REG + 0x004) +#define UHS2_DEV_CONFIG_LT_MINOR_MASK 0xF +#define UHS2_DEV_CONFIG_LT_MAJOR_POS 4 +#define UHS2_DEV_CONFIG_LT_MAJOR_MASK 0x3 +#define UHS2_DEV_CONFIG_N_FCU_POS 8 +#define UHS2_DEV_CONFIG_N_FCU_MASK 0xFF +#define UHS2_DEV_CONFIG_DEV_TYPE_POS 16 +#define UHS2_DEV_CONFIG_DEV_TYPE_MASK 0x7 +#define UHS2_DEV_CONFIG_MAX_BLK_LEN_POS 20 +#define UHS2_DEV_CONFIG_MAX_BLK_LEN_MASK 0xFFF +#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS1 (UHS2_DEV_CONFIG_REG + 0x005) +#define UHS2_DEV_CONFIG_N_DATA_GAP_MASK 0xFF + +#define UHS2_DEV_CONFIG_LINK_TRAN_SET (UHS2_DEV_CONFIG_REG + 0x00C) +#define UHS2_DEV_CONFIG_LT_SET_MAX_BLK_LEN 0x200 +#define UHS2_DEV_CONFIG_LT_SET_MAX_RETRY_POS 16 + +/* Preset register */ +#define UHS2_DEV_CONFIG_PRESET (UHS2_DEV_CONFIG_REG + 0x040) + +#define UHS2_DEV_INT_REG 0x100 + +#define UHS2_DEV_STATUS_REG 0x180 + +#define UHS2_DEV_CMD_REG 0x200 +#define UHS2_DEV_CMD_FULL_RESET (UHS2_DEV_CMD_REG + 0x000) +#define UHS2_DEV_CMD_GO_DORMANT_STATE (UHS2_DEV_CMD_REG + 0x001) +#define UHS2_DEV_CMD_DORMANT_HIBER BIT(7) +#define UHS2_DEV_CMD_DEVICE_INIT (UHS2_DEV_CMD_REG + 0x002) +#define UHS2_DEV_INIT_COMPLETE_FLAG BIT(11) +#define UHS2_DEV_CMD_ENUMERATE (UHS2_DEV_CMD_REG + 0x003) +#define UHS2_DEV_CMD_TRANS_ABORT (UHS2_DEV_CMD_REG + 0x004) + +#define UHS2_RCLK_MAX 52000000 +#define UHS2_RCLK_MIN 26000000 + +#endif /* LINUX_MMC_UHS2_H */ -- cgit v1.2.3 From 5bb798cfdfd00182065cf55c1ff4b2c08d3be13f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 28 Sep 2024 18:20:56 +0200 Subject: memstick: Constify struct memstick_device_id 'struct memstick_device_id' are not modified in these drivers. Constifying this structure moves some data to a read-only section, so increases overall security. Update memstick_dev_match(), memstick_bus_match() and struct memstick_driver accordingly. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 74055 3455 88 77598 12f1e drivers/memstick/core/ms_block.o After: ===== text data bss dec hex filename 74087 3423 88 77598 12f1e drivers/memstick/core/ms_block.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6509d6f6ed64193f04e747a98ccea7492c976ca8.1727540434.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson --- include/linux/memstick.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memstick.h b/include/linux/memstick.h index ebf73d4ee969..107bdcbedf79 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -293,7 +293,7 @@ struct memstick_host { }; struct memstick_driver { - struct memstick_device_id *id_table; + const struct memstick_device_id *id_table; int (*probe)(struct memstick_dev *card); void (*remove)(struct memstick_dev *card); int (*suspend)(struct memstick_dev *card, -- cgit v1.2.3 From 7e019dcc470f27066c98697e43d930df8d54bd9c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 9 Oct 2024 09:50:07 -0400 Subject: sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid") introduced a per-mm/cpu current concurrency id (mm_cid), which keeps a reference to the concurrency id allocated for each CPU. This reference expires shortly after a 100ms delay. These per-CPU references keep the per-mm-cid data cache-local in situations where threads are running at least once on each CPU within each 100ms window, thus keeping the per-cpu reference alive. However, intermittent workloads behaving in bursts spaced by more than 100ms on each CPU exhibit bad cache locality and degraded performance compared to purely per-cpu data indexing, because concurrency IDs are allocated over various CPUs and cores, therefore losing cache locality of the associated data. Introduce the following changes to improve per-mm-cid cache locality: - Add a "recent_cid" field to the per-mm/cpu mm_cid structure to keep track of which mm_cid value was last used, and use it as a hint to attempt re-allocating the same concurrency ID the next time this mm/cpu needs to allocate a concurrency ID, - Add a per-mm CPUs allowed mask, which keeps track of the union of CPUs allowed for all threads belonging to this mm. This cpumask is only set during the lifetime of the mm, never cleared, so it represents the union of all the CPUs allowed since the beginning of the mm lifetime (note that the mm_cpumask() is really arch-specific and tailored to the TLB flush needs, and is thus _not_ a viable approach for this), - Add a per-mm nr_cpus_allowed to keep track of the weight of the per-mm CPUs allowed mask (for fast access), - Add a per-mm max_nr_cid to keep track of the highest number of concurrency IDs allocated for the mm. This is used for expanding the concurrency ID allocation within the upper bound defined by: min(mm->nr_cpus_allowed, mm->mm_users) When the next unused CID value reaches this threshold, stop trying to expand the cid allocation and use the first available cid value instead. Spreading allocation to use all the cid values within the range [ 0, min(mm->nr_cpus_allowed, mm->mm_users) - 1 ] improves cache locality while preserving mm_cid compactness within the expected user limits, - In __mm_cid_try_get, only return cid values within the range [ 0, mm->nr_cpus_allowed ] rather than [ 0, nr_cpu_ids ]. This prevents allocating cids above the number of allowed cpus in rare scenarios where cid allocation races with a concurrent remote-clear of the per-mm/cpu cid. This improvement is made possible by the addition of the per-mm CPUs allowed mask, - In sched_mm_cid_migrate_to, use mm->nr_cpus_allowed rather than t->nr_cpus_allowed. This criterion was really meant to compare the number of mm->mm_users to the number of CPUs allowed for the entire mm. Therefore, the prior comparison worked fine when all threads shared the same CPUs allowed mask, but not so much in scenarios where those threads have different masks (e.g. each thread pinned to a single CPU). This improvement is made possible by the addition of the per-mm CPUs allowed mask. * Benchmarks Each thread increments 16kB worth of 8-bit integers in bursts, with a configurable delay between each thread's execution. Each thread run one after the other (no threads run concurrently). The order of thread execution in the sequence is random. The thread execution sequence begins again after all threads have executed. The 16kB areas are allocated with rseq_mempool and indexed by either cpu_id, mm_cid (not cache-local), or cache-local mm_cid. Each thread is pinned to its own core. Testing configurations: 8-core/1-L3: Use 8 cores within a single L3 24-core/24-L3: Use 24 cores, 1 core per L3 192-core/24-L3: Use 192 cores (all cores in the system) 384-thread/24-L3: Use 384 HW threads (all HW threads in the system) Intermittent workload delays between threads: 200ms, 10ms. Hardware: CPU(s): 384 On-line CPU(s) list: 0-383 Vendor ID: AuthenticAMD Model name: AMD EPYC 9654 96-Core Processor Thread(s) per core: 2 Core(s) per socket: 96 Socket(s): 2 Caches (sum of all): L1d: 6 MiB (192 instances) L1i: 6 MiB (192 instances) L2: 192 MiB (192 instances) L3: 768 MiB (24 instances) Each result is an average of 5 test runs. The cache-local speedup is calculated as: (cache-local mm_cid) / (mm_cid). Intermittent workload delay: 200ms per-cpu mm_cid cache-local mm_cid cache-local speedup (ns) (ns) (ns) 8-core/1-L3 1374 19289 1336 14.4x 24-core/24-L3 2423 26721 1594 16.7x 192-core/24-L3 2291 15826 2153 7.3x 384-thread/24-L3 1874 13234 1907 6.9x Intermittent workload delay: 10ms per-cpu mm_cid cache-local mm_cid cache-local speedup (ns) (ns) (ns) 8-core/1-L3 662 756 686 1.1x 24-core/24-L3 1378 3648 1035 3.5x 192-core/24-L3 1439 10833 1482 7.3x 384-thread/24-L3 1503 10570 1556 6.8x [ This deprecates the prior "sched: NUMA-aware per-memory-map concurrency IDs" patch series with a simpler and more general approach. ] [ This patch applies on top of v6.12-rc1. ] Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://lore.kernel.org/lkml/20240823185946.418340-1-mathieu.desnoyers@efficios.com/ --- include/linux/mm_types.h | 72 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e3bdf8e38bc..381d22eba088 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -782,6 +782,7 @@ struct vm_area_struct { struct mm_cid { u64 time; int cid; + int recent_cid; }; #endif @@ -852,6 +853,27 @@ struct mm_struct { * When the next mm_cid scan is due (in jiffies). */ unsigned long mm_cid_next_scan; + /** + * @nr_cpus_allowed: Number of CPUs allowed for mm. + * + * Number of CPUs allowed in the union of all mm's + * threads allowed CPUs. + */ + unsigned int nr_cpus_allowed; + /** + * @max_nr_cid: Maximum number of concurrency IDs allocated. + * + * Track the highest number of concurrency IDs allocated for the + * mm. + */ + atomic_t max_nr_cid; + /** + * @cpus_allowed_lock: Lock protecting mm cpus_allowed. + * + * Provide mutual exclusion for mm cpus_allowed and + * mm nr_cpus_allowed updates. + */ + raw_spinlock_t cpus_allowed_lock; #endif #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* size of all page tables */ @@ -1170,18 +1192,30 @@ static inline int mm_cid_clear_lazy_put(int cid) return cid & ~MM_CID_LAZY_PUT; } +/* + * mm_cpus_allowed: Union of all mm's threads allowed CPUs. + */ +static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm) +{ + unsigned long bitmap = (unsigned long)mm; + + bitmap += offsetof(struct mm_struct, cpu_bitmap); + /* Skip cpu_bitmap */ + bitmap += cpumask_size(); + return (struct cpumask *)bitmap; +} + /* Accessor for struct mm_struct's cidmask. */ static inline cpumask_t *mm_cidmask(struct mm_struct *mm) { - unsigned long cid_bitmap = (unsigned long)mm; + unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm); - cid_bitmap += offsetof(struct mm_struct, cpu_bitmap); - /* Skip cpu_bitmap */ + /* Skip mm_cpus_allowed */ cid_bitmap += cpumask_size(); return (struct cpumask *)cid_bitmap; } -static inline void mm_init_cid(struct mm_struct *mm) +static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { int i; @@ -1189,17 +1223,22 @@ static inline void mm_init_cid(struct mm_struct *mm) struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i); pcpu_cid->cid = MM_CID_UNSET; + pcpu_cid->recent_cid = MM_CID_UNSET; pcpu_cid->time = 0; } + mm->nr_cpus_allowed = p->nr_cpus_allowed; + atomic_set(&mm->max_nr_cid, 0); + raw_spin_lock_init(&mm->cpus_allowed_lock); + cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); cpumask_clear(mm_cidmask(mm)); } -static inline int mm_alloc_cid_noprof(struct mm_struct *mm) +static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p) { mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid); if (!mm->pcpu_cid) return -ENOMEM; - mm_init_cid(mm); + mm_init_cid(mm, p); return 0; } #define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) @@ -1212,16 +1251,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm) static inline unsigned int mm_cid_size(void) { - return cpumask_size(); + return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */ +} + +static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) +{ + struct cpumask *mm_allowed = mm_cpus_allowed(mm); + + if (!mm) + return; + /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */ + raw_spin_lock(&mm->cpus_allowed_lock); + cpumask_or(mm_allowed, mm_allowed, cpumask); + WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed)); + raw_spin_unlock(&mm->cpus_allowed_lock); } #else /* CONFIG_SCHED_MM_CID */ -static inline void mm_init_cid(struct mm_struct *mm) { } -static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; } +static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { } +static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; } static inline void mm_destroy_cid(struct mm_struct *mm) { } + static inline unsigned int mm_cid_size(void) { return 0; } +static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { } #endif /* CONFIG_SCHED_MM_CID */ struct mmu_gather; -- cgit v1.2.3 From 689274a56c0c088796d359f6c6267323931a2429 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 30 Sep 2024 15:03:26 +0200 Subject: drm: Remove DRM aperture helpers The DRM aperture helpers are wrappers around video helpers from . There are no callers of these functions. Remove them entirely. Signed-off-by: Thomas Zimmermann Cc: Jonathan Corbet Acked-by: Javier Martinez Canillas Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240930130921.689876-29-tzimmermann@suse.de --- include/drm/drm_aperture.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 include/drm/drm_aperture.h (limited to 'include') diff --git a/include/drm/drm_aperture.h b/include/drm/drm_aperture.h deleted file mode 100644 index cbe33b49fd5d..000000000000 --- a/include/drm/drm_aperture.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -#ifndef _DRM_APERTURE_H_ -#define _DRM_APERTURE_H_ - -#include - -struct drm_device; -struct drm_driver; -struct pci_dev; - -int devm_aperture_acquire_from_firmware(struct drm_device *dev, resource_size_t base, - resource_size_t size); - -int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size, - const struct drm_driver *req_driver); - -int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, - const struct drm_driver *req_driver); - -/** - * drm_aperture_remove_framebuffers - remove all existing framebuffers - * @req_driver: requesting DRM driver - * - * This function removes all graphics device drivers. Use this function on systems - * that can have their framebuffer located anywhere in memory. - * - * Returns: - * 0 on success, or a negative errno code otherwise - */ -static inline int -drm_aperture_remove_framebuffers(const struct drm_driver *req_driver) -{ - return drm_aperture_remove_conflicting_framebuffers(0, (resource_size_t)-1, - req_driver); -} - -#endif -- cgit v1.2.3 From c2f8fde8689272a55b9319b69dfe7e8f0e2e9dfe Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Oct 2024 11:40:56 +0200 Subject: fs: add helper to use mount option as path or fd Allow filesystems to use a mount option either as a file or path. Link: https://lore.kernel.org/r/20241014-work-overlayfs-v3-1-32b3fed1286e@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/linux/fs_parser.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 6cf713a7e6c6..3cef566088fc 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -28,7 +28,8 @@ typedef int fs_param_type(struct p_log *, */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, - fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid; + fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid, + fs_param_is_file_or_string; /* * Specification of the type of value a parameter wants. @@ -133,6 +134,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +#define fsparam_file_or_string(NAME, OPT) \ + __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL) #define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) #define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) -- cgit v1.2.3 From b692bf9a7543af7ad11a59d182a3757578f0ba53 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:53 +0200 Subject: xsk: Get rid of xdp_buff_xsk::xskb_list_node Let's bring xdp_buff_xsk back to occupying 2 cachelines by removing xskb_list_node - for the purpose of gathering the xskb frags free_list_node can be used, head of the list (xsk_buff_pool::xskb_list) stays as-is, just reuse the node ptr. It is safe to do as a single xdp_buff_xsk can never reside in two pool's lists simultaneously. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-2-maciej.fijalkowski@intel.com --- include/net/xdp_sock_drv.h | 14 +++++++------- include/net/xsk_buff_pool.h | 1 - 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 0a5dca2b2b3f..360bc1244c6a 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) if (likely(!xdp_buff_has_frags(xdp))) goto out; - list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { - list_del(&pos->xskb_list_node); + list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) { + list_del(&pos->free_list_node); xp_free(pos); } @@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); - list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list); + list_add_tail(&frag->free_list_node, &frag->pool->xskb_list); } static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) @@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_first_entry_or_null(&xskb->pool->xskb_list, - struct xdp_buff_xsk, xskb_list_node); + struct xdp_buff_xsk, free_list_node); if (frag) { - list_del(&frag->xskb_list_node); + list_del(&frag->free_list_node); ret = &frag->xdp; } @@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail) { struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); - list_del(&xskb->xskb_list_node); + list_del(&xskb->free_list_node); } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) @@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, - xskb_list_node); + free_list_node); return &frag->xdp; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index bacb33f1e3e5..aa7f1d0b3a5e 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -30,7 +30,6 @@ struct xdp_buff_xsk { struct xsk_buff_pool *pool; u64 orig_addr; struct list_head free_list_node; - struct list_head xskb_list_node; }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) -- cgit v1.2.3 From 30ec2c1baaead43903ad63ff8e3083949059083c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:54 +0200 Subject: xsk: s/free_list_node/list_node/ Now that free_list_node's purpose is two-folded, make it just a 'list_node'. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-3-maciej.fijalkowski@intel.com --- include/net/xdp_sock_drv.h | 14 +++++++------- include/net/xsk_buff_pool.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 360bc1244c6a..40085afd9160 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) if (likely(!xdp_buff_has_frags(xdp))) goto out; - list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) { - list_del(&pos->free_list_node); + list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { + list_del(&pos->list_node); xp_free(pos); } @@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); - list_add_tail(&frag->free_list_node, &frag->pool->xskb_list); + list_add_tail(&frag->list_node, &frag->pool->xskb_list); } static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) @@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_first_entry_or_null(&xskb->pool->xskb_list, - struct xdp_buff_xsk, free_list_node); + struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->free_list_node); + list_del(&frag->list_node); ret = &frag->xdp; } @@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail) { struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); - list_del(&xskb->free_list_node); + list_del(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) @@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, - free_list_node); + list_node); return &frag->xdp; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index aa7f1d0b3a5e..af8b6f776f86 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -29,7 +29,7 @@ struct xdp_buff_xsk { dma_addr_t frame_dma; struct xsk_buff_pool *pool; u64 orig_addr; - struct list_head free_list_node; + struct list_head list_node; }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) -- cgit v1.2.3 From bea14124bacbe5c9366381e62635eed28ac892ae Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:55 +0200 Subject: xsk: Get rid of xdp_buff_xsk::orig_addr Continue the process of dieting xdp_buff_xsk by removing orig_addr member. It can be calculated from xdp->data_hard_start where it was previously used, so it is not anything that has to be carried around in struct used widely in hot path. This has been used for initializing xdp_buff_xsk::frame_dma during pool setup and as a shortcut in xp_get_handle() to retrieve address provided to xsk Rx queue. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-4-maciej.fijalkowski@intel.com --- include/net/xsk_buff_pool.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index af8b6f776f86..468a23b1b4c5 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -28,7 +28,6 @@ struct xdp_buff_xsk { dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; - u64 orig_addr; struct list_head list_node; }; @@ -119,7 +118,6 @@ void xp_free(struct xdp_buff_xsk *xskb); static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, u64 addr) { - xskb->orig_addr = addr; xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; } @@ -221,14 +219,19 @@ static inline void xp_release(struct xdp_buff_xsk *xskb) xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; } -static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) +static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, + struct xsk_buff_pool *pool) { - u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; + u64 orig_addr = xskb->xdp.data - pool->addrs; + u64 offset; - offset += xskb->pool->headroom; - if (!xskb->pool->unaligned) - return xskb->orig_addr + offset; - return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); + if (!pool->unaligned) + return orig_addr; + + offset = xskb->xdp.data - xskb->xdp.data_hard_start; + orig_addr -= offset; + offset += pool->headroom; + return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) -- cgit v1.2.3 From 6e126872191df946a6fe01b79273119d32d96711 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:56 +0200 Subject: xsk: Carry a copy of xdp_zc_max_segs within xsk_buff_pool This so we avoid dereferencing struct net_device within hot path. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-5-maciej.fijalkowski@intel.com --- include/net/xsk_buff_pool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 468a23b1b4c5..bb03cee716b3 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -76,6 +76,7 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u32 xdp_zc_max_segs; u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; -- cgit v1.2.3 From 6e3ea06240adfef7b46e2338dd824541c31de06d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Oct 2024 18:03:23 +0300 Subject: dmaengine: acpi: Drop unused devm_acpi_dma_controller_free() After introduction a few years ago the devm_acpi_dma_controller_free() was never used. Drop it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20241007150436.2183575-2-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/acpi_dma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index 72cedb916a9c..3ef1ec7a04cb 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -65,7 +65,6 @@ int devm_acpi_dma_controller_register(struct device *dev, struct dma_chan *(*acpi_dma_xlate) (struct acpi_dma_spec *, struct acpi_dma *), void *data); -void devm_acpi_dma_controller_free(struct device *dev); struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, size_t index); @@ -94,9 +93,6 @@ static inline int devm_acpi_dma_controller_register(struct device *dev, { return -ENODEV; } -static inline void devm_acpi_dma_controller_free(struct device *dev) -{ -} static inline struct dma_chan *acpi_dma_request_slave_chan_by_index( struct device *dev, size_t index) -- cgit v1.2.3 From 662f045332addc961940e48eb920caa954abbf09 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Oct 2024 18:03:25 +0300 Subject: dmaengine: acpi: Clean up headers There is a few things done: - include only the headers we are direct user of - when pointer is in use, provide a forward declaration - add missing headers - sort alphabetically Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20241007150436.2183575-4-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/acpi_dma.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index 3ef1ec7a04cb..e748b2877602 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -11,10 +11,11 @@ #ifndef __LINUX_ACPI_DMA_H #define __LINUX_ACPI_DMA_H -#include -#include #include #include +#include + +struct device; /** * struct acpi_dma_spec - slave device DMA resources -- cgit v1.2.3 From 6ba55951e70bf7a8d89c03aa5612d2e0c81ded6d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:15 -0500 Subject: logic_pio: Constify fwnode_handle The fwnode_handle passed into find_io_range_by_fwnode() and logic_pio_trans_hwaddr() are not modified, so make them const. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-2-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/logic_pio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index babf4e3c28ba..8f1a9408302f 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -17,7 +17,7 @@ enum { struct logic_pio_hwaddr { struct list_head list; - struct fwnode_handle *fwnode; + const struct fwnode_handle *fwnode; resource_size_t hw_start; resource_size_t io_start; resource_size_t size; /* range size populated */ @@ -110,8 +110,8 @@ void logic_outsl(unsigned long addr, const void *buffer, unsigned int count); #endif /* CONFIG_INDIRECT_PIO */ #define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE) -struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode); -unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, +struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode); +unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode, resource_size_t hw_addr, resource_size_t size); int logic_pio_register_range(struct logic_pio_hwaddr *newrange); void logic_pio_unregister_range(struct logic_pio_hwaddr *range); -- cgit v1.2.3 From 78e2baf3d96edd21c6f26d8afc0e68d02ec2c51c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:13 +0000 Subject: net: add TIME_WAIT logic to sk_to_full_sk() TCP will soon attach TIME_WAIT sockets to some ACK and RST. Make sure sk_to_full_sk() detects this and does not return a non full socket. v3: also changed sk_const_to_full_sk() Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Martin KaFai Lau Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/bpf-cgroup.h | 2 +- include/net/inet_sock.h | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ce91d9b2acb9..f0f219271daf 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ + if (__sk && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f01dd273bea6..56d8bc5593d3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -321,8 +321,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) static inline struct sock *sk_to_full_sk(struct sock *sk) { #ifdef CONFIG_INET - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) sk = inet_reqsk(sk)->rsk_listener; + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) + sk = NULL; #endif return sk; } @@ -331,8 +333,10 @@ static inline struct sock *sk_to_full_sk(struct sock *sk) static inline const struct sock *sk_const_to_full_sk(const struct sock *sk) { #ifdef CONFIG_INET - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) sk = ((const struct request_sock *)sk)->rsk_listener; + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) + sk = NULL; #endif return sk; } -- cgit v1.2.3 From bc43a3c83cad46a27d6e3bf869acdd926bbe79ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:14 +0000 Subject: net_sched: sch_fq: prepare for TIME_WAIT sockets TCP stack is not attaching skb to TIME_WAIT sockets yet, but we would like to allow this in the future. Add sk_listener_or_tw() helper to detect the three states that FQ needs to take care. Like NEW_SYN_RECV, TIME_WAIT are not full sockets and do not contain sk->sk_pacing_status, sk->sk_pacing_rate. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 6da420ab1ee1..2f200d91ef11 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2802,6 +2802,16 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } +/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV or TIME_WAIT + * TCP SYNACK messages can be attached to LISTEN or NEW_SYN_RECV (depending on SYNCOOKIE) + * TCP RST and ACK can be attached to TIME_WAIT. + */ +static inline bool sk_listener_or_tw(const struct sock *sk) +{ + return (1 << READ_ONCE(sk->sk_state)) & + (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT); +} + void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); -- cgit v1.2.3 From 5ced52fa8f0dc23adb067f7b8a009a5ee051efb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:15 +0000 Subject: net: add skb_set_owner_edemux() helper This can be used to attach a socket to an skb, taking a reference on sk->sk_refcnt. This helper might be a NOP if sk->sk_refcnt is zero. Use it from tcp_make_synack(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2f200d91ef11..bf7fa3db10ae 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1760,6 +1760,15 @@ void sock_efree(struct sk_buff *skb); #ifdef CONFIG_INET void sock_edemux(struct sk_buff *skb); void sock_pfree(struct sk_buff *skb); + +static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + skb->sk = sk; + skb->destructor = sock_edemux; + } +} #else #define sock_edemux sock_efree #endif -- cgit v1.2.3 From 79636038d37e7bd4d078238f2a3f002cab4423bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:17 +0000 Subject: ipv4: tcp: give socket pointer to control skbs ip_send_unicast_reply() send orphaned 'control packets'. These are RST packets and also ACK packets sent from TIME_WAIT. Some eBPF programs would prefer to have a meaningful skb->sk pointer as much as possible. This means that TCP can now attach TIME_WAIT sockets to outgoing skbs. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index bab084df1567..4be0a6a603b2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, + struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, -- cgit v1.2.3 From f15e3b3ddb9fab1c1731b6154e2cd6573fb54c4d Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:56 +0000 Subject: net: napi: Make napi_defer_hard_irqs per-NAPI Add defer_hard_irqs to napi_struct in preparation for per-NAPI settings. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device defer_hard_irq field. Reads from sysfs show the net_device field. The ability to set defer_hard_irqs on specific NAPI instances will be added in a later commit, via netdev-genl. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e6b93d01e631..2e7bc23660ec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -373,6 +373,7 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; @@ -2085,7 +2086,6 @@ struct net_device { unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned long gro_flush_timeout; - u32 napi_defer_hard_irqs; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2413,6 +2413,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + u32 napi_defer_hard_irqs; /** * @lock: protects @net_shaper_hierarchy, feel free to use for other -- cgit v1.2.3 From 516010460011ae74ac3b7383cf90ed27e2711cd6 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:57 +0000 Subject: netdev-genl: Dump napi_defer_hard_irqs Support dumping defer_hard_irqs for a NAPI ID. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-3-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7c308f04e7a0..13dc0b027e86 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -122,6 +122,7 @@ enum { NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, + NETDEV_A_NAPI_DEFER_HARD_IRQS, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From acb8d4ed5661d05f794ef2ce34fd11e699e9ca32 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:58 +0000 Subject: net: napi: Make gro_flush_timeout per-NAPI Allow per-NAPI gro_flush_timeout setting. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device gro_flush_timeout field. Reads from sysfs will read from the net_device field. The ability to set gro_flush_timeout on specific NAPI instances will be added in a later commit, via netdev-genl. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-4-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7bc23660ec..93241d4de437 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -373,6 +373,7 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + unsigned long gro_flush_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; @@ -2085,7 +2086,6 @@ struct net_device { int ifindex; unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; - unsigned long gro_flush_timeout; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2413,6 +2413,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; /** -- cgit v1.2.3 From 0137891e74576f77a7901718dc0ce08ca074ae74 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:59 +0000 Subject: netdev-genl: Dump gro_flush_timeout Support dumping gro_flush_timeout for a NAPI ID. Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-5-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 13dc0b027e86..cacd33359c76 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -123,6 +123,7 @@ enum { NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, + NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 86e25f40aa1e9e54e081e55016f65b5c92523989 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:45:00 +0000 Subject: net: napi: Add napi_config Add a persistent NAPI config area for NAPI configuration to the core. Drivers opt-in to setting the persistent config for a NAPI by passing an index when calling netif_napi_add_config. napi_config is allocated in alloc_netdev_mqs, freed in free_netdev (after the NAPIs are deleted). Drivers which call netif_napi_add_config will have persistent per-NAPI settings: NAPI IDs, gro_flush_timeout, and defer_hard_irq settings. Per-NAPI settings are saved in napi_disable and restored in napi_enable. Co-developed-by: Martin Karsten Signed-off-by: Martin Karsten Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-6-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93241d4de437..8feaca12655e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -342,6 +342,15 @@ struct gro_list { */ #define GRO_HASH_BUCKETS 8 +/* + * Structure for per-NAPI config + */ +struct napi_config { + u64 gro_flush_timeout; + u32 defer_hard_irqs; + unsigned int napi_id; +}; + /* * Structure for NAPI scheduling similar to tasklet but with weighting */ @@ -379,6 +388,8 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + int index; + struct napi_config *config; }; enum { @@ -1868,9 +1879,6 @@ enum netdev_reg_state { * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device * @xdp_prog: XDP sockets filter program pointer - * @gro_flush_timeout: timeout for GRO layer in NAPI - * @napi_defer_hard_irqs: If not zero, provides a counter that would - * allow to avoid NIC hard IRQ, on busy queues. * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one @@ -2020,6 +2028,11 @@ enum netdev_reg_state { * where the clock is recovered. * * @max_pacing_offload_horizon: max EDT offload horizon in nsec. + * @napi_config: An array of napi_config structures containing per-NAPI + * settings. + * @gro_flush_timeout: timeout for GRO layer in NAPI + * @napi_defer_hard_irqs: If not zero, provides a counter that would + * allow to avoid NIC hard IRQ, on busy queues. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2413,6 +2426,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + struct napi_config *napi_config; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; @@ -2678,6 +2692,22 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +/** + * netif_napi_add_config - initialize a NAPI context with persistent config + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @index: the NAPI index + */ +static inline void +netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); +} + /** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device -- cgit v1.2.3 From 1287c1ae0fc227e5acef11a539eb4e75646e31c7 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:45:01 +0000 Subject: netdev-genl: Support setting per-NAPI config values Add support to set per-NAPI defer_hard_irqs and gro_flush_timeout. Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-7-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index cacd33359c76..e3ebb49f60d2 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -201,6 +201,7 @@ enum { NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, + NETDEV_CMD_NAPI_SET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 4971266e1595f76be3f844c834c1f9357a97dbde Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Oct 2024 16:25:03 -0700 Subject: bpf: Add kmem_cache iterator The new "kmem_cache" iterator will traverse the list of slab caches and call attached BPF programs for each entry. It should check the argument (ctx.s) if it's NULL before using it. Now the iteration grabs the slab_mutex only if it traverse the list and releases the mutex when it runs the BPF program. The kmem_cache entry is protected by a refcount during the execution. Signed-off-by: Namhyung Kim Acked-by: Vlastimil Babka #slab Link: https://lore.kernel.org/r/20241010232505.1339892-2-namhyung@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index c0e3e1426a82..139bdececdcf 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -283,5 +283,6 @@ extern u32 btf_tracing_ids[]; extern u32 bpf_cgroup_btf_id[]; extern u32 bpf_local_storage_map_btf_id[]; extern u32 btf_bpf_map_id[]; +extern u32 bpf_kmem_cache_btf_id[]; #endif -- cgit v1.2.3 From 6632863226d88383c9e2bedfeeb928ac7f8232b9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Oct 2024 13:18:08 +0800 Subject: iommu: Remove iommu_present() The last callsite of iommu_present() is removed by commit <45c690aea8ee> ("drm/tegra: Use iommu_paging_domain_alloc()"). Remove it to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241009051808.29455-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..62d1b85c80d3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -785,7 +785,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) } extern int bus_iommu_probe(const struct bus_type *bus); -extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); @@ -1081,11 +1080,6 @@ struct iommu_iotlb_gather {}; struct iommu_dirty_bitmap {}; struct iommu_dirty_ops {}; -static inline bool iommu_present(const struct bus_type *bus) -{ - return false; -} - static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) { return false; -- cgit v1.2.3 From a274465cc3bef2dfd9c9ea5100848dda0a8641e1 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 10 Oct 2024 13:54:19 +0100 Subject: net: phy: support 'active-high' property for PHY LEDs In addition to 'active-low' and 'inactive-high-impedance' also support 'active-high' property for PHY LED pin configuration. As only either 'active-high' or 'active-low' can be set at the same time, WARN and return an error in case both are set. Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/91598487773d768f254d5faf06cf65b13e972f0e.1728558223.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- include/linux/phy.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index ff762a3d8270..bf0eb4e5d35c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -877,8 +877,9 @@ struct phy_plca_status { /* Modes for PHY LED configuration */ enum phy_led_modes { - PHY_LED_ACTIVE_LOW = 0, - PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + PHY_LED_ACTIVE_HIGH = 0, + PHY_LED_ACTIVE_LOW = 1, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 2, /* keep it last */ __PHY_LED_MODES_NUM, -- cgit v1.2.3 From f68303cf1cf2fb96e20df5499c194f1fe5bab9e2 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:14 -0500 Subject: PCI: Constify pci_register_io_range() fwnode_handle pci_register_io_range() does not modify the passed in fwnode_handle, so make it const. Acked-by: Bjorn Helgaas Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-1-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..733ff6570e2d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1556,7 +1556,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, void *alignf_data); -int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr, +int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size); unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); @@ -2019,7 +2019,7 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } -static inline int pci_register_io_range(struct fwnode_handle *fwnode, +static inline int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size) { return -EINVAL; } -- cgit v1.2.3 From ec8c2329da1a8695b3fc80ba67cad9dc75d9a3ec Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:16 -0500 Subject: of: Constify struct device_node function arguments Functions which don't change the refcount or otherwise modify struct device_node can make struct device_node const. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-3-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 14 +++++++------- include/linux/of_address.h | 4 ++-- include/linux/of_irq.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 85b60ac9eec5..7875b308f13c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -357,7 +357,7 @@ extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); extern struct device_node *of_cpu_device_node_get(int cpu); extern int of_cpu_node_to_id(struct device_node *np); extern struct device_node *of_get_next_cpu_node(struct device_node *prev); -extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, +extern struct device_node *of_get_cpu_state_node(const struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); @@ -395,7 +395,7 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it, int size); extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); -extern int of_alias_get_id(struct device_node *np, const char *stem); +extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); bool of_machine_compatible_match(const char *const *compats); @@ -446,9 +446,9 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, */ const char *of_prop_next_string(struct property *prop, const char *cur); -bool of_console_check(struct device_node *dn, char *name, int index); +bool of_console_check(const struct device_node *dn, char *name, int index); -int of_map_id(struct device_node *np, u32 id, +int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); @@ -871,7 +871,7 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag { } -static inline int of_map_id(struct device_node *np, u32 id, +static inline int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { @@ -1734,7 +1734,7 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id, struct device_node *target_base); + int *ovcs_id, const struct device_node *target_base); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); @@ -1744,7 +1744,7 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id, struct device_node *target_base) + int *ovcs_id, const struct device_node *target_base) { return -ENOTSUPP; } diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 26a19daf0d09..bd46dbcc6e88 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -83,8 +83,8 @@ extern struct of_pci_range *of_pci_range_parser_one( struct of_pci_range *range); extern int of_pci_address_to_resource(struct device_node *dev, int bar, struct resource *r); -extern int of_pci_range_to_resource(struct of_pci_range *range, - struct device_node *np, +extern int of_pci_range_to_resource(const struct of_pci_range *range, + const struct device_node *np, struct resource *res); extern int of_range_to_resource(struct device_node *np, int index, struct resource *res); diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index d6d3eae2f145..6337ad4e5fe8 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -48,12 +48,12 @@ extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); extern struct device_node *of_irq_find_parent(struct device_node *child); extern struct irq_domain *of_msi_get_domain(struct device *dev, - struct device_node *np, + const struct device_node *np, enum irq_domain_bus_token token); extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, u32 bus_token); -extern void of_msi_configure(struct device *dev, struct device_node *np); +extern void of_msi_configure(struct device *dev, const struct device_node *np); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline void of_irq_init(const struct of_device_id *matches) -- cgit v1.2.3 From 9c63fea9acd077701fd67bbc21b1e77d6b98b7e0 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:17 -0500 Subject: of: Constify struct property pointers Most accesses to struct property do not modify it, so constify struct property pointers where ever possible in the DT core code. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-4-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 7875b308f13c..086a60f3b8a6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -435,7 +435,7 @@ extern int of_detach_node(struct device_node *); * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ -const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, +const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu); /* * struct property *prop; @@ -444,7 +444,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, * of_property_for_each_string(np, "propname", prop, s) * printk("String value: %s\n", s); */ -const char *of_prop_next_string(struct property *prop, const char *cur); +const char *of_prop_next_string(const struct property *prop, const char *cur); bool of_console_check(const struct device_node *dn, char *name, int index); @@ -826,13 +826,13 @@ static inline bool of_console_check(const struct device_node *dn, const char *na return false; } -static inline const __be32 *of_prop_next_u32(struct property *prop, +static inline const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { return NULL; } -static inline const char *of_prop_next_string(struct property *prop, +static inline const char *of_prop_next_string(const struct property *prop, const char *cur) { return NULL; @@ -899,7 +899,7 @@ static inline const void *of_device_get_match_data(const struct device *dev) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif -static inline int of_prop_val_eq(struct property *p1, struct property *p2) +static inline int of_prop_val_eq(const struct property *p1, const struct property *p2) { return p1->length == p2->length && !memcmp(p1->value, p2->value, (size_t)p1->length); @@ -1252,7 +1252,7 @@ static inline int of_property_read_string_index(const struct device_node *np, static inline bool of_property_read_bool(const struct device_node *np, const char *propname) { - struct property *prop = of_find_property(np, propname, NULL); + const struct property *prop = of_find_property(np, propname, NULL); return prop ? true : false; } @@ -1430,7 +1430,7 @@ static inline int of_property_read_s32(const struct device_node *np, err = of_phandle_iterator_next(it)) #define of_property_for_each_u32(np, propname, u) \ - for (struct {struct property *prop; const __be32 *item; } _it = \ + for (struct {const struct property *prop; const __be32 *item; } _it = \ {of_find_property(np, propname, NULL), \ of_prop_next_u32(_it.prop, NULL, &u)}; \ _it.item; \ -- cgit v1.2.3 From d79616b04f0e08178ceb716a5d2ef60ab723d532 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:20 -0500 Subject: of/address: Constify of_busses[] array and pointers The of_busses array is fixed, so it and all struct of_bus pointers can be const. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-7-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/of_address.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index bd46dbcc6e88..9e034363788a 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -10,7 +10,7 @@ struct of_bus; struct of_pci_range_parser { struct device_node *node; - struct of_bus *bus; + const struct of_bus *bus; const __be32 *range; const __be32 *end; int na; -- cgit v1.2.3 From 74fe1ad4132234f04fcc75e16600449496a67b5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Oct 2024 18:50:14 +0200 Subject: debugobjects: Prepare for batching Move the debug_obj::object pointer into a union and add a pointer to the last node in a batch. That allows to implement batch processing efficiently by utilizing the stack property of hlist: When the first object of a batch is added to the list, then the batch pointer is set to the hlist node of the object itself. Any subsequent add retrieves the pointer to the last node from the first object in the list and uses that for storing the last node pointer in the newly added object. Add the pointer to the data structure and ensure that all relevant pool sizes are strictly batch sized. The actual batching implementation follows in subsequent changes. Signed-off-by: Thomas Gleixner Reviewed-by: Zhen Lei Link: https://lore.kernel.org/all/20241007164914.139204961@linutronix.de --- include/linux/debugobjects.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 32444686b6ff..8b95545e7924 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -23,13 +23,17 @@ struct debug_obj_descr; * @state: tracked object state * @astate: current active state * @object: pointer to the real object + * @batch_last: pointer to the last hlist node in a batch * @descr: pointer to an object type specific debug description structure */ struct debug_obj { - struct hlist_node node; - enum debug_obj_state state; - unsigned int astate; - void *object; + struct hlist_node node; + enum debug_obj_state state; + unsigned int astate; + union { + void *object; + struct hlist_node *batch_last; + }; const struct debug_obj_descr *descr; }; -- cgit v1.2.3 From e4cf33ca48128d580e25ebe779b7ba7b4b4cf733 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Oct 2024 20:21:14 -0400 Subject: ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs Most architectures use pt_regs within ftrace_regs making a lot of the accessor functions just calls to the pt_regs internally. Instead of duplication this effort, use a HAVE_ARCH_FTRACE_REGS for architectures that have their own ftrace_regs that is not based on pt_regs and will define all the accessor functions, and for the architectures that just use pt_regs, it will leave it undefined, and the default accessor functions will be used. Note, this will also make it easier to add new accessor functions to ftrace_regs as it will mean having to touch less architectures. Cc: Cc: "x86@kernel.org" Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Naveen N Rao Cc: Madhavan Srinivasan Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Link: https://lore.kernel.org/20241010202114.2289f6fd@gandalf.local.home Acked-by: Masami Hiramatsu (Google) Acked-by: Heiko Carstens # s390 Acked-by: Catalin Marinas Acked-by: Michael Ellerman # powerpc Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 32 +++++++------------------------- include/linux/ftrace_regs.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 25 deletions(-) create mode 100644 include/linux/ftrace_regs.h (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 66f10291a0b2..aa9ddd1e4bb6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -113,6 +113,8 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER +#include + extern int ftrace_enabled; /** @@ -150,14 +152,11 @@ struct ftrace_regs { #define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; - -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - +/* + * Architectures that define HAVE_DYNAMIC_FTRACE_WITH_ARGS must define their own + * arch_ftrace_get_regs() where it only returns pt_regs *if* it is fully + * populated. It should return NULL otherwise. + */ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { return &arch_ftrace_regs(fregs)->regs; @@ -191,23 +190,6 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -#define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(ftrace_get_regs(fregs), n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(ftrace_get_regs(fregs)) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(ftrace_get_regs(fregs), ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(ftrace_get_regs(fregs)) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) -#endif - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h new file mode 100644 index 000000000000..dea6a0851b74 --- /dev/null +++ b/include/linux/ftrace_regs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FTRACE_REGS_H +#define _LINUX_FTRACE_REGS_H + +/* + * For archs that just copy pt_regs in ftrace regs, it can use this default. + * If an architecture does not use pt_regs, it must define all the below + * accessor functions. + */ +#ifndef HAVE_ARCH_FTRACE_REGS +struct __arch_ftrace_regs { + struct pt_regs regs; +}; + +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct ftrace_regs; + +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(arch_ftrace_get_regs(fregs), ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + +#endif /* HAVE_ARCH_FTRACE_REGS */ + +#endif /* _LINUX_FTRACE_REGS_H */ -- cgit v1.2.3 From 39c089a01a7e431383710a566864644cbbc0f8fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 10 Oct 2024 17:44:44 +0200 Subject: vdso: Remove timekeeper argument of __arch_update_vsyscall() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No implementation of this hook uses the passed in timekeeper anymore. This avoids including a non-VDSO header while building the VDSO, which can lead to compilation errors. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lore.kernel.org/all/20241010-vdso-generic-arch_update_vsyscall-v1-1-7fe5a3ea4382@linutronix.de --- include/asm-generic/vdso/vsyscall.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index c835607f78ae..01dafd604188 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -12,8 +12,7 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) #endif /* __arch_get_k_vdso_data */ #ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, - struct timekeeper *tk) +static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata) { } #endif /* __arch_update_vsyscall */ -- cgit v1.2.3 From a066397e8ed1036e8b959050ab6e830ee90d9f58 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 13 Sep 2024 19:19:53 -0400 Subject: tpm: fix unsigned/signed mismatch errors related to __calc_tpm2_event_size __calc_tpm2_event_size returns 0 or a positive length, but return values are often interpreted as ints. Convert everything over to u32 to avoid signed/unsigned logic errors. Signed-off-by: Gregory Price Signed-off-by: Ard Biesheuvel --- include/linux/tpm_eventlog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 7d68a5cc5881..891368e82558 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -157,7 +157,7 @@ struct tcg_algorithm_info { * Return: size of the event on success, 0 on failure */ -static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, +static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header, bool do_mapping) { -- cgit v1.2.3 From 58797abed49d6b78c7af99b03b037f20c7ffb203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 5 Oct 2024 12:04:17 +0200 Subject: power: supply: core: constify power_supply_battery_info::resist_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power supply core never modifies the resist table. Reflect this in the API, so drivers can mark their static tables as const. Signed-off-by: Thomas Weißschuh Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241005-power-supply-battery-const-v1-1-c1f721927048@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 910d407ebe63..9253411c105f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -752,7 +752,7 @@ struct power_supply_battery_info { int temp_max; struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; - struct power_supply_resistance_temp_table *resist_table; + const struct power_supply_resistance_temp_table *resist_table; int resist_table_size; const struct power_supply_vbat_ri_table *vbat2ri_discharging; int vbat2ri_discharging_size; @@ -805,7 +805,7 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, int ocv, int temp); extern int -power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table, +power_supply_temp2resist_simple(const struct power_supply_resistance_temp_table *table, int table_len, int temp); extern int power_supply_vbat2ri(struct power_supply_battery_info *info, int vbat_uv, bool charging); -- cgit v1.2.3 From ce20d5b9e37099a035ab34d4d3f59e1744756385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 5 Oct 2024 12:04:21 +0200 Subject: power: supply: core: constify power_supply_battery_info::ocv_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power supply core never modifies the ocv table. Reflect this in the API, so drivers can mark their static tables as const. Signed-off-by: Thomas Weißschuh Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241005-power-supply-battery-const-v1-5-c1f721927048@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9253411c105f..4e29ec39c18f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -750,7 +750,7 @@ struct power_supply_battery_info { int temp_alert_max; int temp_min; int temp_max; - struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; + const struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; const struct power_supply_resistance_temp_table *resist_table; int resist_table_size; @@ -797,9 +797,9 @@ extern bool power_supply_battery_info_has_prop(struct power_supply_battery_info extern int power_supply_battery_info_get_prop(struct power_supply_battery_info *info, enum power_supply_property psp, union power_supply_propval *val); -extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, +extern int power_supply_ocv2cap_simple(const struct power_supply_battery_ocv_table *table, int table_len, int ocv); -extern struct power_supply_battery_ocv_table * +extern const struct power_supply_battery_ocv_table * power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, int temp, int *table_len); extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, -- cgit v1.2.3 From 49000fee9e639f62ba1f965ed2ae4c5ad18d19e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 5 Oct 2024 12:05:03 +0200 Subject: power: supply: core: add wakeup source inhibit by power_supply_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To inhibit wakeup users currently have to use dedicated functions to register the power supply: {,devm_}power_supply_register_no_ws(). This is inconsistent to other runtime settings which can be configured through struct power_supply_config. It's also not obvious what _no_ws() is meant to mean. Extend power_supply_config to also be able to inhibit the wakeup source. Signed-off-by: Thomas Weißschuh Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20241005-power-supply-no-wakeup-source-v1-1-1d62bf9bcb1d@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4e29ec39c18f..9a64043798bd 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -236,6 +236,8 @@ struct power_supply_config { char **supplied_to; size_t num_supplicants; + + bool no_wakeup_source; }; /* Description of power supply */ -- cgit v1.2.3 From 85d319e14f301e1c68131b74c1dceabae73d1e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 5 Oct 2024 12:05:10 +0200 Subject: power: supply: core: remove {,devm_}power_supply_register_no_ws() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same functionality is available through power_supply_config::no_wakeup_source, which is more idiomatic. All users of the old API have been converted. Also remove the argument "ws" from __power_supply_register(), as it is now always "true". Signed-off-by: Thomas Weißschuh Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20241005-power-supply-no-wakeup-source-v1-8-1d62bf9bcb1d@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9a64043798bd..14eedefb3ac8 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -874,17 +874,9 @@ power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg); extern struct power_supply *__must_check -power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg); -extern struct power_supply *__must_check devm_power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg); -extern struct power_supply *__must_check -devm_power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg); extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); -- cgit v1.2.3 From 0b582611a8f4270fa357a22a546909b2dd5fc5fe Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 15 Oct 2024 10:28:43 +0900 Subject: ftrace: Use arch_ftrace_regs() for ftrace_regs_*() macros Since the arch_ftrace_get_regs(fregs) is only valid when the FL_SAVE_REGS is set, we need to use `&arch_ftrace_regs()->regs` for ftrace_regs_*() APIs because those APIs are for ftrace_regs, not complete pt_regs. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Cc: Mark Rutland Link: https://lore.kernel.org/172895572290.107311.16057631001860177198.stgit@devnote2 Fixes: e4cf33ca4812 ("ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace_regs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index dea6a0851b74..b78a0a60515b 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -17,17 +17,17 @@ struct __arch_ftrace_regs { struct ftrace_regs; #define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(arch_ftrace_get_regs(fregs)) + instruction_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(arch_ftrace_get_regs(fregs)) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(arch_ftrace_get_regs(fregs)) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(arch_ftrace_get_regs(fregs), ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(arch_ftrace_get_regs(fregs)) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) -- cgit v1.2.3 From 2d17932da44fdc1ba835ad05110ab996d2912dbf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 15 Oct 2024 10:28:53 +0900 Subject: ftrace: Rename ftrace_regs_return_value to ftrace_regs_get_return_value Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as other ftrace_regs_get/set_* APIs. arm64 and riscv are already using this new name. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Link: https://lore.kernel.org/172895573350.107311.7564634260652361511.stgit@devnote2 Acked-by: Mark Rutland Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index b78a0a60515b..be1ed0c891d0 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -22,7 +22,7 @@ struct ftrace_regs; regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ +#define ftrace_regs_get_return_value(fregs) \ regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -- cgit v1.2.3 From efe8419ae78d65e83edc31aad74b605c12e7d60c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 14 Oct 2024 16:13:39 +0100 Subject: vdso: Introduce vdso/page.h The VDSO implementation includes headers from outside of the vdso/ namespace. Introduce vdso/page.h to make sure that the generic library uses only the allowed namespace. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Acked-by: Geert Uytterhoeven # m68k Link: https://lore.kernel.org/all/20241014151340.1639555-3-vincenzo.frascino@arm.com --- include/vdso/page.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/vdso/page.h (limited to 'include') diff --git a/include/vdso/page.h b/include/vdso/page.h new file mode 100644 index 000000000000..4ada1ba6bd1f --- /dev/null +++ b/include/vdso/page.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_PAGE_H +#define __VDSO_PAGE_H + +#include + +/* + * PAGE_SHIFT determines the page size. + * + * Note: This definition is required because PAGE_SHIFT is used + * in several places throuout the codebase. + */ +#define PAGE_SHIFT CONFIG_PAGE_SHIFT + +#define PAGE_SIZE (_AC(1,UL) << CONFIG_PAGE_SHIFT) + +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT) +/* + * Applies only to 32-bit architectures with a 64-bit phys_addr_t. + * + * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long. + * So if we assign PAGE_MASK to a larger type it gets extended the + * way we want (i.e. with 1s in the high bits) + */ +#define PAGE_MASK (~((1 << CONFIG_PAGE_SHIFT) - 1)) +#else +#define PAGE_MASK (~(PAGE_SIZE - 1)) +#endif + +#endif /* __VDSO_PAGE_H */ -- cgit v1.2.3 From 102f085d84607462234ac60f6027973b45a9bde2 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:21 +0200 Subject: timers: Rename usleep_idle_range() to usleep_range_idle() usleep_idle_range() is a variant of usleep_range(). Both are using usleep_range_state() as a base. To be able to find all the related functions in one go, rename it usleep_idle_range() to usleep_range_idle(). No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: SeongJae Park Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-4-dc8b907cb62f@linutronix.de --- include/linux/delay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index ff9cda975e30..2bc586aa2068 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -68,7 +68,7 @@ static inline void usleep_range(unsigned long min, unsigned long max) usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } -static inline void usleep_idle_range(unsigned long min, unsigned long max) +static inline void usleep_range_idle(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_IDLE); } -- cgit v1.2.3 From f36eb171410839325fff9cd9b7b7400f7e606962 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:22 +0200 Subject: timers: Update function descriptions of sleep/delay related functions A lot of commonly used functions for inserting a sleep or delay lack a proper function description. Add function descriptions to all of them to have important information in a central place close to the code. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-5-dc8b907cb62f@linutronix.de --- include/asm-generic/delay.h | 41 ++++++++++++++++++++++++++++++++++---- include/linux/delay.h | 48 +++++++++++++++++++++++++++++++++------------ 2 files changed, 73 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h index e448ac61430c..a8cee41cc51b 100644 --- a/include/asm-generic/delay.h +++ b/include/asm-generic/delay.h @@ -12,11 +12,39 @@ extern void __const_udelay(unsigned long xloops); extern void __delay(unsigned long loops); /* - * The weird n/20000 thing suppresses a "comparison is always false due to - * limited range of data type" warning with non-const 8-bit arguments. + * Implementation details: + * + * * The weird n/20000 thing suppresses a "comparison is always false due to + * limited range of data type" warning with non-const 8-bit arguments. + * * 0x10c7 is 2**32 / 1000000 (rounded up) -> udelay + * * 0x5 is 2**32 / 1000000000 (rounded up) -> ndelay */ -/* 0x10c7 is 2**32 / 1000000 (rounded up) */ +/** + * udelay - Inserting a delay based on microseconds with busy waiting + * @usec: requested delay in microseconds + * + * When delaying in an atomic context ndelay(), udelay() and mdelay() are the + * only valid variants of delaying/sleeping to go with. + * + * When inserting delays in non atomic context which are shorter than the time + * which is required to queue e.g. an hrtimer and to enter then the scheduler, + * it is also valuable to use udelay(). But it is not simple to specify a + * generic threshold for this which will fit for all systems. An approximation + * is a threshold for all delays up to 10 microseconds. + * + * When having a delay which is larger than the architecture specific + * %MAX_UDELAY_MS value, please make sure mdelay() is used. Otherwise a overflow + * risk is given. + * + * Please note that ndelay(), udelay() and mdelay() may return early for several + * reasons (https://lists.openwall.net/linux-kernel/2011/01/09/56): + * + * #. computed loops_per_jiffy too low (due to the time taken to execute the + * timer interrupt.) + * #. cache behaviour affecting the time it takes to execute the loop function. + * #. CPU clock rate changes. + */ #define udelay(n) \ ({ \ if (__builtin_constant_p(n)) { \ @@ -29,7 +57,12 @@ extern void __delay(unsigned long loops); } \ }) -/* 0x5 is 2**32 / 1000000000 (rounded up) */ +/** + * ndelay - Inserting a delay based on nanoseconds with busy waiting + * @nsec: requested delay in nanoseconds + * + * See udelay() for basic information about ndelay() and it's variants. + */ #define ndelay(n) \ ({ \ if (__builtin_constant_p(n)) { \ diff --git a/include/linux/delay.h b/include/linux/delay.h index 2bc586aa2068..2de509e4adce 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -6,17 +6,7 @@ * Copyright (C) 1993 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_jiffy" value. - * - * Please note that ndelay(), udelay() and mdelay() may return early for - * several reasons: - * 1. computed loops_per_jiffy too low (due to the time taken to - * execute the timer interrupt.) - * 2. cache behaviour affecting the time it takes to execute the - * loop function. - * 3. CPU clock rate changes. - * - * Please see this thread: - * https://lists.openwall.net/linux-kernel/2011/01/09/56 + * Sleep routines using timer list timers or hrtimers. */ #include @@ -35,12 +25,21 @@ extern unsigned long loops_per_jiffy; * The 2nd mdelay() definition ensures GCC will optimize away the * while loop for the common cases where n <= MAX_UDELAY_MS -- Paul G. */ - #ifndef MAX_UDELAY_MS #define MAX_UDELAY_MS 5 #endif #ifndef mdelay +/** + * mdelay - Inserting a delay based on milliseconds with busy waiting + * @n: requested delay in milliseconds + * + * See udelay() for basic information about mdelay() and it's variants. + * + * Please double check, whether mdelay() is the right way to go or whether a + * refactoring of the code is the better variant to be able to use msleep() + * instead. + */ #define mdelay(n) (\ (__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \ ({unsigned long __ms=(n); while (__ms--) udelay(1000);})) @@ -63,16 +62,41 @@ unsigned long msleep_interruptible(unsigned int msecs); void usleep_range_state(unsigned long min, unsigned long max, unsigned int state); +/** + * usleep_range - Sleep for an approximate time + * @min: Minimum time in microseconds to sleep + * @max: Maximum time in microseconds to sleep + * + * For basic information please refere to usleep_range_state(). + * + * The task will be in the state TASK_UNINTERRUPTIBLE during the sleep. + */ static inline void usleep_range(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } +/** + * usleep_range_idle - Sleep for an approximate time with idle time accounting + * @min: Minimum time in microseconds to sleep + * @max: Maximum time in microseconds to sleep + * + * For basic information please refere to usleep_range_state(). + * + * The sleeping task has the state TASK_IDLE during the sleep to prevent + * contribution to the load avarage. + */ static inline void usleep_range_idle(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_IDLE); } +/** + * ssleep - wrapper for seconds around msleep + * @seconds: Requested sleep duration in seconds + * + * Please refere to msleep() for detailed information. + */ static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); -- cgit v1.2.3 From 19e2d91d8cb1f333adf04731f2788ff6ca06cebd Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:23 +0200 Subject: delay: Rework udelay and ndelay udelay() as well as ndelay() are defines and no functions and are using constants to be able to transform a sleep time into loops and to prevent too long udelays/ndelays. There was a compiler error with non-const 8 bit arguments which was fixed by commit a87e553fabe8 ("asm-generic: delay.h fix udelay and ndelay for 8 bit args"). When using a function, the non-const 8 bit argument is type casted and the problem would be gone. Transform udelay() and ndelay() into proper functions, remove the no longer and confusing division, add defines for the magic values and add some explanations as well. Suggested-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-6-dc8b907cb62f@linutronix.de --- include/asm-generic/delay.h | 65 ++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h index a8cee41cc51b..76cf237b6e4c 100644 --- a/include/asm-generic/delay.h +++ b/include/asm-generic/delay.h @@ -2,6 +2,9 @@ #ifndef __ASM_GENERIC_DELAY_H #define __ASM_GENERIC_DELAY_H +#include +#include + /* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); @@ -12,13 +15,18 @@ extern void __const_udelay(unsigned long xloops); extern void __delay(unsigned long loops); /* - * Implementation details: - * - * * The weird n/20000 thing suppresses a "comparison is always false due to - * limited range of data type" warning with non-const 8-bit arguments. - * * 0x10c7 is 2**32 / 1000000 (rounded up) -> udelay - * * 0x5 is 2**32 / 1000000000 (rounded up) -> ndelay + * The microseconds/nanosecond delay multiplicators are used to convert a + * constant microseconds/nanoseconds value to a value which can be used by the + * architectures specific implementation to transform it into loops. + */ +#define UDELAY_CONST_MULT ((unsigned long)DIV_ROUND_UP(1ULL << 32, USEC_PER_SEC)) +#define NDELAY_CONST_MULT ((unsigned long)DIV_ROUND_UP(1ULL << 32, NSEC_PER_SEC)) + +/* + * The maximum constant udelay/ndelay value picked out of thin air to prevent + * too long constant udelays/ndelays. */ +#define DELAY_CONST_MAX 20000 /** * udelay - Inserting a delay based on microseconds with busy waiting @@ -45,17 +53,17 @@ extern void __delay(unsigned long loops); * #. cache behaviour affecting the time it takes to execute the loop function. * #. CPU clock rate changes. */ -#define udelay(n) \ - ({ \ - if (__builtin_constant_p(n)) { \ - if ((n) / 20000 >= 1) \ - __bad_udelay(); \ - else \ - __const_udelay((n) * 0x10c7ul); \ - } else { \ - __udelay(n); \ - } \ - }) +static __always_inline void udelay(unsigned long usec) +{ + if (__builtin_constant_p(usec)) { + if (usec >= DELAY_CONST_MAX) + __bad_udelay(); + else + __const_udelay(usec * UDELAY_CONST_MULT); + } else { + __udelay(usec); + } +} /** * ndelay - Inserting a delay based on nanoseconds with busy waiting @@ -63,16 +71,17 @@ extern void __delay(unsigned long loops); * * See udelay() for basic information about ndelay() and it's variants. */ -#define ndelay(n) \ - ({ \ - if (__builtin_constant_p(n)) { \ - if ((n) / 20000 >= 1) \ - __bad_ndelay(); \ - else \ - __const_udelay((n) * 5ul); \ - } else { \ - __ndelay(n); \ - } \ - }) +static __always_inline void ndelay(unsigned long nsec) +{ + if (__builtin_constant_p(nsec)) { + if (nsec >= DELAY_CONST_MAX) + __bad_udelay(); + else + __const_udelay(nsec * NDELAY_CONST_MULT); + } else { + __udelay(nsec); + } +} +#define ndelay(x) ndelay(x) #endif /* __ASM_GENERIC_DELAY_H */ -- cgit v1.2.3 From 82e11e47c1880362e05c065bef7dbe28a749555c Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:24 +0200 Subject: timers: Adjust flseep() to reflect reality fsleep() simply implements the recommendations of the outdated documentation in "Documentation/timers/timers-howto.rst". This should be a user friendly interface to choose always the best timeout function approach: - udelay() for very short sleep durations shorter than 10 microseconds - usleep_range() for sleep durations until 20 milliseconds - msleep() for the others The actual implementation has several problems: - It does not take into account that HZ resolution also has an impact on granularity of jiffies and has also an impact on the granularity of the buckets of timer wheel levels. This means that accuracy for the timeout does not have an upper limit. When executing fsleep(20000) on a HZ=100 system, the possible additional slack will be 50% as the granularity of the buckets in the lowest level is 10 milliseconds. - The upper limit of usleep_range() is twice the requested timeout. When no other interrupts occur in this range, the maximum value is used. This means that the requested sleep length has then an additional delay of 100%. Change the thresholds for the decisions in fsleep() to make sure the maximum slack which is added to the sleep duration is 25%. Note: Outdated documentation will be updated in a followup patch. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-7-dc8b907cb62f@linutronix.de --- include/linux/delay.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index 2de509e4adce..89866bab100d 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -11,6 +11,7 @@ #include #include +#include extern unsigned long loops_per_jiffy; @@ -102,15 +103,35 @@ static inline void ssleep(unsigned int seconds) msleep(seconds * 1000); } -/* see Documentation/timers/timers-howto.rst for the thresholds */ +static const unsigned int max_slack_shift = 2; +#define USLEEP_RANGE_UPPER_BOUND ((TICK_NSEC << max_slack_shift) / NSEC_PER_USEC) + +/** + * fsleep - flexible sleep which autoselects the best mechanism + * @usecs: requested sleep duration in microseconds + * + * flseep() selects the best mechanism that will provide maximum 25% slack + * to the requested sleep duration. Therefore it uses: + * + * * udelay() loop for sleep durations <= 10 microseconds to avoid hrtimer + * overhead for really short sleep durations. + * * usleep_range() for sleep durations which would lead with the usage of + * msleep() to a slack larger than 25%. This depends on the granularity of + * jiffies. + * * msleep() for all other sleep durations. + * + * Note: When %CONFIG_HIGH_RES_TIMERS is not set, all sleeps are processed with + * the granularity of jiffies and the slack might exceed 25% especially for + * short sleep durations. + */ static inline void fsleep(unsigned long usecs) { if (usecs <= 10) udelay(usecs); - else if (usecs <= 20000) - usleep_range(usecs, 2 * usecs); + else if (usecs < USLEEP_RANGE_UPPER_BOUND) + usleep_range(usecs, usecs + (usecs >> max_slack_shift)); else - msleep(DIV_ROUND_UP(usecs, 1000)); + msleep(DIV_ROUND_UP(usecs, USEC_PER_MSEC)); } #endif /* defined(_LINUX_DELAY_H) */ -- cgit v1.2.3 From 89124747f096fc0fe44be0162c7b4fb3271739e8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:29 +0200 Subject: iopoll/regmap/phy/snd: Fix comment referencing outdated timer documentation Function descriptions in iopoll.h, regmap.h, phy.h and sound/soc/sof/ops.h copied all the same outdated documentation about sleep/delay function limitations. In those comments, the generic (and still outdated) timer documentation file is referenced. As proper function descriptions for used delay and sleep functions are in place, simply update the descriptions to reference to them. While at it fix missing colon after "Returns" in function description and move return value description to the end of the function description. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Andrew Lunn # for phy.h Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-12-dc8b907cb62f@linutronix.de --- include/linux/iopoll.h | 52 +++++++++++++++++++++++++------------------------- include/linux/phy.h | 9 +++++---- include/linux/regmap.h | 38 ++++++++++++++++++------------------ 3 files changed, 50 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 19a7b00baff4..91324c331a4b 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -19,19 +19,19 @@ * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @sleep_before_read: if it is true, sleep @sleep_us before read. * @args: arguments for @op poll * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. */ #define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ sleep_before_read, args...) \ @@ -64,22 +64,22 @@ * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Should - * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @delay_before_read: if it is true, delay @delay_us before read. * @args: arguments for @op poll * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. - * * This macro does not rely on timekeeping. Hence it is safe to call even when * timekeeping is suspended, at the expense of an underestimation of wall clock * time, which is rather minimal with a non-zero delay_us. * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. */ #define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \ delay_before_read, args...) \ @@ -119,17 +119,17 @@ * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr) @@ -140,16 +140,16 @@ * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Should - * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. */ #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr) diff --git a/include/linux/phy.h b/include/linux/phy.h index a98bc91a0cde..504766d4b2d5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1378,12 +1378,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); * @regnum: The register on the MMD to read * @val: Variable to read the register into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @sleep_before_read: if it is true, sleep @sleep_us before read. - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @args is stored in @val. Must not * be called from atomic context if sleep_us or timeout_us are used. */ diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f9ccad32fc5c..75f162b60ba1 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -106,17 +106,17 @@ struct reg_sequence { * @addr: Address to poll * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read * error return value in case of a error read. In the two former cases, * the last read value at @addr is stored in @val. Must not be called * from atomic context if sleep_us or timeout_us are used. - * - * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. */ #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \ ({ \ @@ -133,20 +133,20 @@ struct reg_sequence { * @addr: Address to poll * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). - * Should be less than ~10us since udelay is used - * (see Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read - * error return value in case of a error read. In the two former cases, - * the last read value at @addr is stored in @val. - * * This is modelled after the readx_poll_timeout_atomic macros in linux/iopoll.h. * * Note: In general regmap cannot be used in atomic context. If you want to use * this macro then first setup your regmap for atomic use (flat or no cache * and MMIO regmap). + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read + * error return value in case of a error read. In the two former cases, + * the last read value at @addr is stored in @val. */ #define regmap_read_poll_timeout_atomic(map, addr, val, cond, delay_us, timeout_us) \ ({ \ @@ -177,17 +177,17 @@ struct reg_sequence { * @field: Regmap field to read from * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read * error return value in case of a error read. In the two former cases, * the last read value at @addr is stored in @val. Must not be called * from atomic context if sleep_us or timeout_us are used. - * - * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. */ #define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \ ({ \ -- cgit v1.2.3 From 95b3120a485f77a9bb8060bf3398311e3dcb6c65 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 16:52:16 -0700 Subject: neighbour: Remove NEIGH_DN_TABLE. Since commit 1202cdd66531 ("Remove DECnet support from kernel"), NEIGH_DN_TABLE is no longer used. MPLS has implicit dependency on it in nla_put_via(), but nla_get_via() does not support DECnet. Let's remove NEIGH_DN_TABLE. Now, neigh_tables[] has only 2 elements and no extra iteration for DECnet in many places. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241014235216.10785-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a44f262a7384..3887ed9e5026 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -239,7 +239,6 @@ struct neigh_table { enum { NEIGH_ARP_TABLE = 0, NEIGH_ND_TABLE = 1, - NEIGH_DN_TABLE = 2, NEIGH_NR_TABLES, NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ }; -- cgit v1.2.3 From e1c6c383123ab1caadbfe39b3362ce0cc09dd766 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:28 -0700 Subject: rtnetlink: Remove rtnl_register() and rtnl_register_module(). No one uses rtnl_register() and rtnl_register_module(). Let's remove them. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-12-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 2d3eb7cb4dff..bb49c5708ce7 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -29,6 +29,16 @@ static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) return msgtype & RTNL_KIND_MASK; } +/** + * struct rtnl_msg_handler - rtnetlink message type and handlers + * + * @owner: NULL for built-in, THIS_MODULE for module + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions + */ struct rtnl_msg_handler { struct module *owner; int protocol; @@ -38,11 +48,6 @@ struct rtnl_msg_handler { int flags; }; -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); -int rtnl_register_module(struct module *owner, int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); -int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n); -- cgit v1.2.3 From 6390834c6f9b2c5e33f52f34579efa0d0df073db Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Oct 2024 13:31:10 +0300 Subject: media: uapi: Add meta formats for PiSP FE config and stats Add two meta formats for PiSP FE: V4L2_META_FMT_RPI_FE_CFG and V4L2_META_FMT_RPI_FE_STATS. The former is used to provide configuration for the FE and the latter is used to read the statistics from the FE. Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index ded023edac70..e7c4dce39007 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -860,6 +860,8 @@ struct v4l2_pix_format { /* Vendor specific - used for RaspberryPi PiSP */ #define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ +#define V4L2_META_FMT_RPI_FE_CFG v4l2_fourcc('R', 'P', 'F', 'C') /* PiSP FE configuration */ +#define V4L2_META_FMT_RPI_FE_STATS v4l2_fourcc('R', 'P', 'F', 'S') /* PiSP FE stats */ #ifdef __KERNEL__ /* -- cgit v1.2.3 From 6edb685abb2af445773876a326292b989dcb3c9f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Oct 2024 13:31:12 +0300 Subject: media: raspberrypi: Add support for RP1-CFE Add support for Raspberry Pi CFE. The CFE is a hardware block that contains: - MIPI D-PHY - MIPI CSI-2 receiver - Front End ISP (FE) The driver has been upported from the Raspberry Pi kernel commit 88a681df9623 ("ARM: dts: bcm2712-rpi: Add i2c_pins labels"). Co-developed-by: Naushir Patuck Signed-off-by: Naushir Patuck Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- .../uapi/linux/media/raspberrypi/pisp_fe_config.h | 273 +++++++++++++++++++++ .../linux/media/raspberrypi/pisp_fe_statistics.h | 64 +++++ 2 files changed, 337 insertions(+) create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_config.h create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_config.h b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h new file mode 100644 index 000000000000..77237460a3b5 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP Front End Driver Configuration structures + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_FE_CONFIG_ +#define _UAPI_PISP_FE_CONFIG_ + +#include + +#include "pisp_common.h" +#include "pisp_fe_statistics.h" + +#define PISP_FE_NUM_OUTPUTS 2 + +enum pisp_fe_enable { + PISP_FE_ENABLE_INPUT = 0x000001, + PISP_FE_ENABLE_DECOMPRESS = 0x000002, + PISP_FE_ENABLE_DECOMPAND = 0x000004, + PISP_FE_ENABLE_BLA = 0x000008, + PISP_FE_ENABLE_DPC = 0x000010, + PISP_FE_ENABLE_STATS_CROP = 0x000020, + PISP_FE_ENABLE_DECIMATE = 0x000040, + PISP_FE_ENABLE_BLC = 0x000080, + PISP_FE_ENABLE_CDAF_STATS = 0x000100, + PISP_FE_ENABLE_AWB_STATS = 0x000200, + PISP_FE_ENABLE_RGBY = 0x000400, + PISP_FE_ENABLE_LSC = 0x000800, + PISP_FE_ENABLE_AGC_STATS = 0x001000, + PISP_FE_ENABLE_CROP0 = 0x010000, + PISP_FE_ENABLE_DOWNSCALE0 = 0x020000, + PISP_FE_ENABLE_COMPRESS0 = 0x040000, + PISP_FE_ENABLE_OUTPUT0 = 0x080000, + PISP_FE_ENABLE_CROP1 = 0x100000, + PISP_FE_ENABLE_DOWNSCALE1 = 0x200000, + PISP_FE_ENABLE_COMPRESS1 = 0x400000, + PISP_FE_ENABLE_OUTPUT1 = 0x800000 +}; + +#define PISP_FE_ENABLE_CROP(i) (PISP_FE_ENABLE_CROP0 << (4 * (i))) +#define PISP_FE_ENABLE_DOWNSCALE(i) (PISP_FE_ENABLE_DOWNSCALE0 << (4 * (i))) +#define PISP_FE_ENABLE_COMPRESS(i) (PISP_FE_ENABLE_COMPRESS0 << (4 * (i))) +#define PISP_FE_ENABLE_OUTPUT(i) (PISP_FE_ENABLE_OUTPUT0 << (4 * (i))) + +/* + * We use the enable flags to show when blocks are "dirty", but we need some + * extra ones too. + */ +enum pisp_fe_dirty { + PISP_FE_DIRTY_GLOBAL = 0x0001, + PISP_FE_DIRTY_FLOATING = 0x0002, + PISP_FE_DIRTY_OUTPUT_AXI = 0x0004 +}; + +struct pisp_fe_global_config { + __u32 enables; + __u8 bayer_order; + __u8 pad[3]; +} __attribute__((packed)); + +struct pisp_fe_input_axi_config { + /* burst length minus one, in the range 0..15; OR'd with flags */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields */ + __u8 cache_prot; + /* QoS (only 4 LS bits are used) */ + __u16 qos; +} __attribute__((packed)); + +struct pisp_fe_output_axi_config { + /* burst length minus one, in the range 0..15; OR'd with flags */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields */ + __u8 cache_prot; + /* QoS (4 bitfields of 4 bits each for different panic levels) */ + __u16 qos; + /* For Panic mode: Output FIFO panic threshold */ + __u16 thresh; + /* For Panic mode: Output FIFO statistics throttle threshold */ + __u16 throttle; +} __attribute__((packed)); + +struct pisp_fe_input_config { + __u8 streaming; + __u8 pad[3]; + struct pisp_image_format_config format; + struct pisp_fe_input_axi_config axi; + /* Extra cycles delay before issuing each burst request */ + __u8 holdoff; + __u8 pad2[3]; +} __attribute__((packed)); + +struct pisp_fe_output_config { + struct pisp_image_format_config format; + __u16 ilines; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_fe_input_buffer_config { + __u32 addr_lo; + __u32 addr_hi; + __u16 frame_id; + __u16 pad; +} __attribute__((packed)); + +#define PISP_FE_DECOMPAND_LUT_SIZE 65 + +struct pisp_fe_decompand_config { + __u16 lut[PISP_FE_DECOMPAND_LUT_SIZE]; + __u16 pad; +} __attribute__((packed)); + +struct pisp_fe_dpc_config { + __u8 coeff_level; + __u8 coeff_range; + __u8 coeff_range2; +#define PISP_FE_DPC_FLAG_FOLDBACK 1 +#define PISP_FE_DPC_FLAG_VFLAG 2 + __u8 flags; +} __attribute__((packed)); + +#define PISP_FE_LSC_LUT_SIZE 16 + +struct pisp_fe_lsc_config { + __u8 shift; + __u8 pad0; + __u16 scale; + __u16 centre_x; + __u16 centre_y; + __u16 lut[PISP_FE_LSC_LUT_SIZE]; +} __attribute__((packed)); + +struct pisp_fe_rgby_config { + __u16 gain_r; + __u16 gain_g; + __u16 gain_b; + __u8 maxflag; + __u8 pad; +} __attribute__((packed)); + +struct pisp_fe_agc_stats_config { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + /* each weight only 4 bits */ + __u8 weights[PISP_AGC_STATS_NUM_ZONES / 2]; + __u16 row_offset_x; + __u16 row_offset_y; + __u16 row_size_x; + __u16 row_size_y; + __u8 row_shift; + __u8 float_shift; + __u8 pad1[2]; +} __attribute__((packed)); + +struct pisp_fe_awb_stats_config { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + __u8 shift; + __u8 pad[3]; + __u16 r_lo; + __u16 r_hi; + __u16 g_lo; + __u16 g_hi; + __u16 b_lo; + __u16 b_hi; +} __attribute__((packed)); + +struct pisp_fe_floating_stats_region { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; +} __attribute__((packed)); + +struct pisp_fe_floating_stats_config { + struct pisp_fe_floating_stats_region + regions[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_FE_CDAF_NUM_WEIGHTS 8 + +struct pisp_fe_cdaf_stats_config { + __u16 noise_constant; + __u16 noise_slope; + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + __u16 skip_x; + __u16 skip_y; + __u32 mode; +} __attribute__((packed)); + +struct pisp_fe_stats_buffer_config { + __u32 addr_lo; + __u32 addr_hi; +} __attribute__((packed)); + +struct pisp_fe_crop_config { + __u16 offset_x; + __u16 offset_y; + __u16 width; + __u16 height; +} __attribute__((packed)); + +enum pisp_fe_downscale_flags { + /* downscale the four Bayer components independently... */ + DOWNSCALE_BAYER = 1, + /* ...without trying to preserve their spatial relationship */ + DOWNSCALE_BIN = 2, +}; + +struct pisp_fe_downscale_config { + __u8 xin; + __u8 xout; + __u8 yin; + __u8 yout; + __u8 flags; /* enum pisp_fe_downscale_flags */ + __u8 pad[3]; + __u16 output_width; + __u16 output_height; +} __attribute__((packed)); + +struct pisp_fe_output_buffer_config { + __u32 addr_lo; + __u32 addr_hi; +} __attribute__((packed)); + +/* Each of the two output channels/branches: */ +struct pisp_fe_output_branch_config { + struct pisp_fe_crop_config crop; + struct pisp_fe_downscale_config downscale; + struct pisp_compress_config compress; + struct pisp_fe_output_config output; + __u32 pad; +} __attribute__((packed)); + +/* And finally one to rule them all: */ +struct pisp_fe_config { + /* I/O configuration: */ + struct pisp_fe_stats_buffer_config stats_buffer; + struct pisp_fe_output_buffer_config output_buffer[PISP_FE_NUM_OUTPUTS]; + struct pisp_fe_input_buffer_config input_buffer; + /* processing configuration: */ + struct pisp_fe_global_config global; + struct pisp_fe_input_config input; + struct pisp_decompress_config decompress; + struct pisp_fe_decompand_config decompand; + struct pisp_bla_config bla; + struct pisp_fe_dpc_config dpc; + struct pisp_fe_crop_config stats_crop; + __u32 spare1; /* placeholder for future decimate configuration */ + struct pisp_bla_config blc; + struct pisp_fe_rgby_config rgby; + struct pisp_fe_lsc_config lsc; + struct pisp_fe_agc_stats_config agc_stats; + struct pisp_fe_awb_stats_config awb_stats; + struct pisp_fe_cdaf_stats_config cdaf_stats; + struct pisp_fe_floating_stats_config floating_stats; + struct pisp_fe_output_axi_config output_axi; + struct pisp_fe_output_branch_config ch[PISP_FE_NUM_OUTPUTS]; + /* non-register fields: */ + __u32 dirty_flags; /* these use pisp_fe_enable */ + __u32 dirty_flags_extra; /* these use pisp_fe_dirty */ +} __attribute__((packed)); + +#endif /* _UAPI_PISP_FE_CONFIG_ */ diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h new file mode 100644 index 000000000000..a7d42985aee8 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP Front End statistics definitions + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_FE_STATISTICS_H_ +#define _UAPI_PISP_FE_STATISTICS_H_ + +#include + +#define PISP_FLOATING_STATS_NUM_ZONES 4 +#define PISP_AGC_STATS_NUM_BINS 1024 +#define PISP_AGC_STATS_SIZE 16 +#define PISP_AGC_STATS_NUM_ZONES (PISP_AGC_STATS_SIZE * PISP_AGC_STATS_SIZE) +#define PISP_AGC_STATS_NUM_ROW_SUMS 512 + +struct pisp_agc_statistics_zone { + __u64 Y_sum; + __u32 counted; + __u32 pad; +} __attribute__((packed)); + +struct pisp_agc_statistics { + __u32 row_sums[PISP_AGC_STATS_NUM_ROW_SUMS]; + /* + * 32-bits per bin means an image (just less than) 16384x16384 pixels + * in size can weight every pixel from 0 to 15. + */ + __u32 histogram[PISP_AGC_STATS_NUM_BINS]; + struct pisp_agc_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_AWB_STATS_SIZE 32 +#define PISP_AWB_STATS_NUM_ZONES (PISP_AWB_STATS_SIZE * PISP_AWB_STATS_SIZE) + +struct pisp_awb_statistics_zone { + __u32 R_sum; + __u32 G_sum; + __u32 B_sum; + __u32 counted; +} __attribute__((packed)); + +struct pisp_awb_statistics { + struct pisp_awb_statistics_zone zones[PISP_AWB_STATS_NUM_ZONES]; + struct pisp_awb_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_CDAF_STATS_SIZE 8 +#define PISP_CDAF_STATS_NUM_FOMS (PISP_CDAF_STATS_SIZE * PISP_CDAF_STATS_SIZE) + +struct pisp_cdaf_statistics { + __u64 foms[PISP_CDAF_STATS_NUM_FOMS]; + __u64 floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +struct pisp_statistics { + struct pisp_awb_statistics awb; + struct pisp_agc_statistics agc; + struct pisp_cdaf_statistics cdaf; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_FE_STATISTICS_H_ */ -- cgit v1.2.3 From 30fe661eb9d32d0aa65a86507572bd654a374c56 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Sep 2024 15:14:44 +0300 Subject: media: Documentation: Deprecate s_stream video op, update docs The scope of the s_stream video operation is now fully supported by {enable,disable}_streams. Explicitly document the s_stream() op as deprecated and update the related documentation. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Reviewed-by: Tomi Valkeinen Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 8daa0929865c..3cc6b4a5935f 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -450,8 +450,9 @@ enum v4l2_subdev_pre_streamon_flags { * already started or stopped subdev. Also see call_s_stream wrapper in * v4l2-subdev.c. * - * New drivers should instead implement &v4l2_subdev_pad_ops.enable_streams - * and &v4l2_subdev_pad_ops.disable_streams operations, and use + * This callback is DEPRECATED. New drivers should instead implement + * &v4l2_subdev_pad_ops.enable_streams and + * &v4l2_subdev_pad_ops.disable_streams operations, and use * v4l2_subdev_s_stream_helper for the &v4l2_subdev_video_ops.s_stream * operation to support legacy users. * -- cgit v1.2.3 From 646aed96d51088c1f24b4d710fa6fa4a17ecc484 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Sep 2024 15:31:47 +0300 Subject: media: Documentation: Update {enable,disable}_streams documentation Document the expected {enable,disable}_streams callback behaviour for drivers that are stream-unaware i.e. don't specify the V4L2_SUBDEV_CAP_STREAMS sub-device capability flag. In this specific case, the mask argument can be ignored. Signed-off-by: Sakari Ailus Reviewed-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 3cc6b4a5935f..ff63fb6046b1 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -834,11 +834,19 @@ struct v4l2_subdev_state { * v4l2_subdev_init_finalize() at initialization time). Do not call * directly, use v4l2_subdev_enable_streams() instead. * + * Drivers that support only a single stream without setting the + * V4L2_SUBDEV_CAP_STREAMS sub-device capability flag can ignore the mask + * argument. + * * @disable_streams: Disable the streams defined in streams_mask on the given * source pad. Subdevs that implement this operation must use the active * state management provided by the subdev core (enabled through a call to * v4l2_subdev_init_finalize() at initialization time). Do not call * directly, use v4l2_subdev_disable_streams() instead. + * + * Drivers that support only a single stream without setting the + * V4L2_SUBDEV_CAP_STREAMS sub-device capability flag can ignore the mask + * argument. */ struct v4l2_subdev_pad_ops { int (*enum_mbus_code)(struct v4l2_subdev *sd, -- cgit v1.2.3 From fd0e579bc62cb1766469866a89471d2d8c8b721d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Sep 2024 17:58:58 +0300 Subject: media: Documentation: Improve v4l2_subdev_{en,dis}able_streams documentation Document that callers of v4l2_subdev_{en,dis}able_streams() need to set the mask to BIT_ULL(0). Signed-off-by: Sakari Ailus Reviewed-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index ff63fb6046b1..2f2200875b03 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1685,6 +1685,8 @@ int v4l2_subdev_routing_validate(struct v4l2_subdev *sd, * function implements a best-effort compatibility by calling the .s_stream() * operation, limited to subdevs that have a single source pad. * + * Drivers that are not stream-aware shall set @streams_mask to BIT_ULL(0). + * * Return: * * 0: Success * * -EALREADY: One of the streams in streams_mask is already enabled @@ -1715,6 +1717,8 @@ int v4l2_subdev_enable_streams(struct v4l2_subdev *sd, u32 pad, * function implements a best-effort compatibility by calling the .s_stream() * operation, limited to subdevs that have a single source pad. * + * Drivers that are not stream-aware shall set @streams_mask to BIT_ULL(0). + * * Return: * * 0: Success * * -EALREADY: One of the streams in streams_mask is not enabled -- cgit v1.2.3 From 719258c55f7e9c123ef2a0941c6730b6d43c1bc2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 9 Sep 2024 15:49:41 +0200 Subject: mfd: palmas: Constify strings with regulator names The names of regulators are static const strings, so pointers can be made as pointers to const for code safety. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240909134941.121847-1-krzysztof.kozlowski@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/palmas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index eda1ffd99c1a..dabcc0dea802 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -98,8 +98,8 @@ struct palmas_sleep_requestor_info { }; struct palmas_regs_info { - char *name; - char *sname; + const char *name; + const char *sname; u8 vsel_addr; u8 ctrl_addr; u8 tstep_addr; -- cgit v1.2.3 From d5340a18cd321075442677ce1002d8e163b17b67 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Thu, 26 Sep 2024 13:28:10 +0300 Subject: mfd: max77693: Remove unused max77693_irq_source declarations Remove `enum max77693_irq_source` declaration because unused. Signed-off-by: Dzmitry Sankouski Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240617-starqltechn_integration_upstream-v5-1-125d9228d751@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 20c5e02ed9da..c324d548619e 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -419,17 +419,6 @@ enum max77693_haptic_reg { #define MAX77693_CONFIG2_MEN 6 #define MAX77693_CONFIG2_HTYP 5 -enum max77693_irq_source { - LED_INT = 0, - TOPSYS_INT, - CHG_INT, - MUIC_INT1, - MUIC_INT2, - MUIC_INT3, - - MAX77693_IRQ_GROUP_NR, -}; - #define SRC_IRQ_CHARGER BIT(0) #define SRC_IRQ_TOP BIT(1) #define SRC_IRQ_FLASH BIT(2) -- cgit v1.2.3 From bf231e5febcf9358d7e70a2c6974548f7f3e4f61 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Thu, 26 Sep 2024 12:47:31 +0300 Subject: mfd: sec-core: Add support for the Samsung s2dos05 S2DOS05 is a panel/touchscreen PMIC, often found in Samsung phones. We define regulator sub-device for which driver will be added in subsequent patch. The device also has ADC for power and current measurements. Signed-off-by: Dzmitry Sankouski Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240617-starqltechn_integration_upstream-v5-2-ea1109029ba5@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index a212b9f72bc9..750274d41fc0 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -37,6 +37,7 @@ struct gpio_desc; enum sec_device_type { S5M8767X, + S2DOS05, S2MPA01, S2MPS11X, S2MPS13X, -- cgit v1.2.3 From 2b627246c0cf2b54f1b17531dde964cf5b99b2a5 Mon Sep 17 00:00:00 2001 From: Pavan Holla Date: Tue, 10 Sep 2024 10:15:21 +0000 Subject: platform/chrome: Update EC feature flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define EC_FEATURE_UCSI_PPM to enable usage of the cros_ec_ucsi driver. Also, add any feature flags that are implemented by the EC but are missing in the kernel header. Signed-off-by: Pavan Holla Signed-off-by: Łukasz Bartosik Acked-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20240910101527.603452-3-ukaszb@chromium.org Signed-off-by: Lee Jones --- include/linux/platform_data/cros_ec_commands.h | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index e574b790be6f..b3c4993e656e 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1312,6 +1312,38 @@ enum ec_feature_code { * The EC supports the AP composing VDMs for us to send. */ EC_FEATURE_TYPEC_AP_VDM_SEND = 46, + /* + * The EC supports system safe mode panic recovery. + */ + EC_FEATURE_SYSTEM_SAFE_MODE = 47, + /* + * The EC will reboot on runtime assertion failures. + */ + EC_FEATURE_ASSERT_REBOOTS = 48, + /* + * The EC image is built with tokenized logging enabled. + */ + EC_FEATURE_TOKENIZED_LOGGING = 49, + /* + * The EC supports triggering an STB dump. + */ + EC_FEATURE_AMD_STB_DUMP = 50, + /* + * The EC supports memory dump commands. + */ + EC_FEATURE_MEMORY_DUMP = 51, + /* + * The EC supports DP2.1 capability + */ + EC_FEATURE_TYPEC_DP2_1 = 52, + /* + * The MCU is System Companion Processor Core 1 + */ + EC_FEATURE_SCP_C1 = 53, + /* + * The EC supports UCSI PPM. + */ + EC_FEATURE_UCSI_PPM = 54, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) -- cgit v1.2.3 From 7b5a58952fc3b51905c2963647485565df1e5e26 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Fri, 27 Sep 2024 20:51:38 +0530 Subject: usb: gadget: uvc: configfs: Add frame-based frame format support Add support for frame-based frame format, which can be used to support multiple formats like H264 or H265, in addition to MJPEG and YUV frames. The frame-based format is set to H264 by default, but it can be updated to other formats by modifying the GUID through the guid configfs attribute. Different structures are used for all three formats, as H264 has a different structure compared to MJPEG and uncompressed formats. These structures will be passed to the frame make function based on the active format, using a common frame structure with additional parameters needed only for frame-based formats. These parameters are handled at runtime in the UVC driver. Signed-off-by: Akash Kumar Link: https://lore.kernel.org/r/20240927152138.31416-1-quic_akakum@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/video.h | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h index 2ff0e8a3a683..526b5155e23c 100644 --- a/include/uapi/linux/usb/video.h +++ b/include/uapi/linux/usb/video.h @@ -597,5 +597,63 @@ struct UVC_FRAME_MJPEG(n) { \ __le32 dwFrameInterval[n]; \ } __attribute__ ((packed)) +/* Frame Based Payload - 3.1.1. Frame Based Video Format Descriptor */ +struct uvc_format_framebased { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + __u8 bFormatIndex; + __u8 bNumFrameDescriptors; + __u8 guidFormat[16]; + __u8 bBitsPerPixel; + __u8 bDefaultFrameIndex; + __u8 bAspectRatioX; + __u8 bAspectRatioY; + __u8 bmInterfaceFlags; + __u8 bCopyProtect; + __u8 bVariableSize; +} __attribute__((__packed__)); + +#define UVC_DT_FORMAT_FRAMEBASED_SIZE 28 + +/* Frame Based Payload - 3.1.2. Frame Based Video Frame Descriptor */ +struct uvc_frame_framebased { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + __u8 bFrameIndex; + __u8 bmCapabilities; + __u16 wWidth; + __u16 wHeight; + __u32 dwMinBitRate; + __u32 dwMaxBitRate; + __u32 dwDefaultFrameInterval; + __u8 bFrameIntervalType; + __u32 dwBytesPerLine; + __u32 dwFrameInterval[]; +} __attribute__((__packed__)); + +#define UVC_DT_FRAME_FRAMEBASED_SIZE(n) (26+4*(n)) + +#define UVC_FRAME_FRAMEBASED(n) \ + uvc_frame_framebased_##n + +#define DECLARE_UVC_FRAME_FRAMEBASED(n) \ +struct UVC_FRAME_FRAMEBASED(n) { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubType; \ + __u8 bFrameIndex; \ + __u8 bmCapabilities; \ + __u16 wWidth; \ + __u16 wHeight; \ + __u32 dwMinBitRate; \ + __u32 dwMaxBitRate; \ + __u32 dwDefaultFrameInterval; \ + __u8 bFrameIntervalType; \ + __u32 dwBytesPerLine; \ + __u32 dwFrameInterval[n]; \ +} __attribute__ ((packed)) + #endif /* __LINUX_USB_VIDEO_H */ -- cgit v1.2.3 From 522249f05c5551aec9ec0ba9b6438f1ec19c138d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 3 Oct 2024 16:29:22 +0200 Subject: fanotify: allow reporting errors on failure to open fd When working in "fd mode", fanotify_read() needs to open an fd from a dentry to report event->fd to userspace. Opening an fd from dentry can fail for several reasons. For example, when tasks are gone and we try to open their /proc files or we try to open a WRONLY file like in sysfs or when trying to open a file that was deleted on the remote network server. Add a new flag FAN_REPORT_FD_ERROR for fanotify_init(). For a group with FAN_REPORT_FD_ERROR, we will send the event with the error instead of the open fd, otherwise userspace may not get the error at all. For an overflow event, we report -EBADF to avoid confusing FAN_NOFD with -EPERM. Similarly for pidfd open errors we report either -ESRCH or the open error instead of FAN_NOPIDFD and FAN_EPIDFD. In any case, userspace will not know which file failed to open, so add a debug print for further investigation. Reported-by: Krishna Vivek Vitta Link: https://lore.kernel.org/linux-fsdevel/SI2P153MB07182F3424619EDDD1F393EED46D2@SI2P153MB0718.APCP153.PROD.OUTLOOK.COM/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241003142922.111539-1-amir73il@gmail.com --- include/linux/fanotify.h | 1 + include/uapi/linux/fanotify.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 4f1c4f603118..89ff45bd6f01 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -36,6 +36,7 @@ #define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ FAN_REPORT_TID | \ FAN_REPORT_PIDFD | \ + FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ FAN_UNLIMITED_MARKS) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index a37de58ca571..34f221d3a1b9 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -60,6 +60,7 @@ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) -- cgit v1.2.3 From d6083f040d5d8f8d748462c77e90547097df936e Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 15 Oct 2024 23:02:06 +0800 Subject: bpf: Prevent tailcall infinite loop caused by freplace There is a potential infinite loop issue that can occur when using a combination of tail calls and freplace. In an upcoming selftest, the attach target for entry_freplace of tailcall_freplace.c is subprog_tc of tc_bpf2bpf.c, while the tail call in entry_freplace leads to entry_tc. This results in an infinite loop: entry_tc -> subprog_tc -> entry_freplace --tailcall-> entry_tc. The problem arises because the tail_call_cnt in entry_freplace resets to zero each time entry_freplace is executed, causing the tail call mechanism to never terminate, eventually leading to a kernel panic. To fix this issue, the solution is twofold: 1. Prevent updating a program extended by an freplace program to a prog_array map. 2. Prevent extending a program that is already part of a prog_array map with an freplace program. This ensures that: * If a program or its subprogram has been extended by an freplace program, it can no longer be updated to a prog_array map. * If a program has been added to a prog_array map, neither it nor its subprograms can be extended by an freplace program. Moreover, an extension program should not be tailcalled. As such, return -EINVAL if the program has a type of BPF_PROG_TYPE_EXT when adding it to a prog_array map. Additionally, fix a minor code style issue by replacing eight spaces with a tab for proper formatting. Reviewed-by: Eduard Zingerman Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20241015150207.70264-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19d8ca8ac960..0c216e71cec7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1292,8 +1292,12 @@ void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -1374,12 +1378,14 @@ void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } @@ -1483,6 +1489,9 @@ struct bpf_prog_aux { bool xdp_has_frags; bool exception_cb; bool exception_boundary; + bool is_extended; /* true if extended by freplace program */ + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; -- cgit v1.2.3 From ce1dfe6d328966b75821c1f043a940eb2569768a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:43 +0900 Subject: PCI: endpoint: Introduce pci_epc_mem_map()/unmap() Some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region. For instance, the endpoint controller of the RK3399 SoC uses at most the lower 20 bits of a physical memory address region as the lower bits of a RC PCI address region. For mapping a PCI address region of size bytes starting from pci_addr, the exact number of address bits used is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1]. For this example, this creates the following constraints: 1) The offset into the controller physical memory allocated for a mapping depends on the mapping size *and* the starting PCI address for the mapping. 2) A mapping size cannot exceed the controller windows size (1MB) minus the offset needed into the allocated physical memory, which can end up being a smaller size than the desired mapping size. Handling these constraints independently of the controller being used in an endpoint function driver is not possible with the current EPC API as only the ->align field in struct pci_epc_features is provided but used for BAR (inbound ATU mappings) mapping only. A new API is needed for function drivers to discover mapping constraints and handle non-static requirements based on the RC PCI address range to access. Introduce the endpoint controller operation ->align_addr() to allow the EPC core functions to obtain the size and the offset into a controller address region that must be allocated and mapped to access a RC PCI address region. The size of the mapping provided by the align_addr() operation can then be used as the size argument for the function pci_epc_mem_alloc_addr() and the offset into the allocated controller memory provided can be used to correctly handle data transfers. For endpoint controllers that have PCI address alignment constraints, the align_addr() operation may indicate upon return an effective PCI address mapping size that is smaller (but not 0) than the requested PCI address region size. The controller ->align_addr() operation is optional: controllers that do not have any alignment constraints for mapping RC PCI address regions do not need to implement this operation. For such controllers, it is always assumed that the mapping size is equal to the requested size of the PCI region and that the mapping offset is 0. The function pci_epc_mem_map() is introduced to use this new controller operation (if it is defined) to handle controller memory allocation and mapping to a RC PCI address region in endpoint function drivers. This function first uses the ->align_addr() controller operation to determine the controller memory address size (and offset into) needed for mapping an RC PCI address region. The result of this operation is used to allocate a controller physical memory region using pci_epc_mem_alloc_addr() and then to map that memory to the RC PCI address space with pci_epc_map_addr(). Since ->align_addr() () may indicate that not all of a RC PCI address region can be mapped, pci_epc_mem_map() may only partially map the RC PCI address region specified. It is the responsibility of the caller (an endpoint function driver) to handle such smaller mapping by repeatedly using pci_epc_mem_map() over the desried PCI address range. The counterpart of pci_epc_mem_map() to unmap and free a mapped controller memory address region is pci_epc_mem_unmap(). Both functions operate using the new struct pci_epc_map data structure. This new structure represents a mapping PCI address, mapping effective size, the size of the controller memory needed for the mapping as well as the physical and virtual CPU addresses of the mapping (phys_base and virt_base fields). For convenience, the physical and virtual CPU addresses within that mapping to use to access the target RC PCI address region are also provided (phys_addr and virt_addr fields). Endpoint function drivers can use struct pci_epc_map to access the mapped RC PCI address region using the ->virt_addr and ->pci_size fields. Co-developed-by: Rick Wertenbroek Signed-off-by: Rick Wertenbroek Signed-off-by: Damien Le Moal Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-4-dlemoal@kernel.org [mani: squashed the patch that changed phy_addr_t to u64] Signed-off-by: Manivannan Sadhasivam --- include/linux/pci-epc.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 42ef06136bd1..de8cc3658220 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -32,11 +32,43 @@ pci_epc_interface_string(enum pci_epc_interface_type type) } } +/** + * struct pci_epc_map - information about EPC memory for mapping a RC PCI + * address range + * @pci_addr: start address of the RC PCI address range to map + * @pci_size: size of the RC PCI address range mapped from @pci_addr + * @map_pci_addr: RC PCI address used as the first address mapped (may be lower + * than @pci_addr) + * @map_size: size of the controller memory needed for mapping the RC PCI address + * range @pci_addr..@pci_addr+@pci_size + * @phys_base: base physical address of the allocated EPC memory for mapping the + * RC PCI address range + * @phys_addr: physical address at which @pci_addr is mapped + * @virt_base: base virtual address of the allocated EPC memory for mapping the + * RC PCI address range + * @virt_addr: virtual address at which @pci_addr is mapped + */ +struct pci_epc_map { + u64 pci_addr; + size_t pci_size; + + u64 map_pci_addr; + size_t map_size; + + phys_addr_t phys_base; + phys_addr_t phys_addr; + void __iomem *virt_base; + void __iomem *virt_addr; +}; + /** * struct pci_epc_ops - set of function pointers for performing EPC operations * @write_header: ops to populate configuration space header * @set_bar: ops to configure the BAR * @clear_bar: ops to reset the BAR + * @align_addr: operation to get the mapping address, mapping size and offset + * into a controller memory window needed to map an RC PCI address + * region * @map_addr: ops to map CPU address to PCI address * @unmap_addr: ops to unmap CPU address and PCI address * @set_msi: ops to set the requested number of MSI interrupts in the MSI @@ -61,6 +93,8 @@ struct pci_epc_ops { struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); + u64 (*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size, + size_t *offset); int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr, u64 pci_addr, size_t size); void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, @@ -278,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size); +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map); +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map); #else static inline void pci_epc_init_notify(struct pci_epc *epc) -- cgit v1.2.3 From 5280a14a6079040205a1d968cd80f20448d047c7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Oct 2024 12:09:32 -0700 Subject: genirq: Introduce irq_get_nr_irqs() and irq_set_nr_irqs() Prepare for changing 'nr_irqs' from an exported global variable into a variable with file scope. This will prevent accidental changes of assignments to a local variable 'nr_irqs' into assignments to the global 'nr_irqs' variable. Suppose that a patch would be submitted for review that removes a declaration of a local variable with the name 'nr_irqs' and that that patch does not remove all assignments to that local variable. Such a patch converts an assignment to a local variable into an assignment into a global variable. If the 'nr_irqs' assignment is more than three lines away from other changes, the assignment won't be included in the diff context lines and hence won't be visible without inspecting the modified file. With these abstraction series applied, such accidental conversions from assignments to a local variable into an assignment to a global variable are converted into a compilation error. Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241015190953.1266194-2-bvanassche@acm.org --- include/linux/irqnr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 3496baa0b07f..7419b807b71b 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -6,6 +6,8 @@ extern int nr_irqs; +unsigned int irq_get_nr_irqs(void) __pure; +unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); unsigned int irq_get_next_irq(unsigned int offset); -- cgit v1.2.3 From 1ad2048bf7146efb83bc033147ca1611a7fe8494 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Oct 2024 12:09:52 -0700 Subject: genirq: Switch to irq_get_nr_irqs() Use the irq_get_nr_irqs() function instead of the global variable 'nr_irqs'. Cache the result of this function in a local variable in order not to rely on CSE (common subexpression elimination). Prepare for changing 'nr_irqs' from an exported global variable into a variable with file scope. Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241015190953.1266194-22-bvanassche@acm.org --- include/linux/irqnr.h | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7419b807b71b..a33088d27c54 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -11,26 +11,31 @@ unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); unsigned int irq_get_next_irq(unsigned int offset); -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ - irq++, desc = irq_to_desc(irq)) \ - if (!desc) \ - ; \ - else - +#define for_each_irq_desc(irq, desc) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = 0, desc = irq_to_desc(irq); irq < __nr_irqs__; \ + irq++, desc = irq_to_desc(irq)) \ + if (!desc) \ + ; \ + else # define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \ - irq--, desc = irq_to_desc(irq)) \ + for (irq = irq_get_nr_irqs() - 1, desc = irq_to_desc(irq); \ + irq >= 0; irq--, desc = irq_to_desc(irq)) \ if (!desc) \ ; \ else -# define for_each_active_irq(irq) \ - for (irq = irq_get_next_irq(0); irq < nr_irqs; \ - irq = irq_get_next_irq(irq + 1)) +#define for_each_active_irq(irq) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = irq_get_next_irq(0); irq < __nr_irqs__; \ + irq = irq_get_next_irq(irq + 1)) -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) +#define for_each_irq_nr(irq) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = 0; irq < __nr_irqs__; irq++) #endif -- cgit v1.2.3 From ef4c675dc2961ee533bdc1ea20390761df0af5be Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Oct 2024 12:09:53 -0700 Subject: genirq: Unexport nr_irqs Unexport nr_irqs and declare it static now that all code that reads or modifies nr_irqs has been converted to number_of_interrupts() / set_number_of_interrupts(). Change the type of 'nr_irqs' from 'int' into 'unsigned int' to match the return type and argument type of the irq_get_nr_iqs() / irq_set_nr_irqs() functions. Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241015190953.1266194-23-bvanassche@acm.org --- include/linux/irqnr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index a33088d27c54..e97206c721a0 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -5,7 +5,6 @@ #include -extern int nr_irqs; unsigned int irq_get_nr_irqs(void) __pure; unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); -- cgit v1.2.3 From cf70da29c4993bf23df68b67a82dfa3da8234e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 5 Oct 2024 12:06:16 +0200 Subject: power: supply: core: unexport power_supply_property_is_writeable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ("power: supply: Drop use_cnt check from power_supply_property_is_writeable()"), this function does not check use_cnt anymore, making it unsuitable for general usage. As it is only used by the psy core anyways, remove it from the public header and unexport it to avoid misusage. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241005-power-supply-cleanups-v1-2-45303b2d0a4d@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 14eedefb3ac8..c1d6cb7f4c40 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -865,8 +865,6 @@ static inline int power_supply_set_property(struct power_supply *psy, const union power_supply_propval *val) { return 0; } #endif -extern int power_supply_property_is_writeable(struct power_supply *psy, - enum power_supply_property psp); extern void power_supply_external_power_changed(struct power_supply *psy); extern struct power_supply *__must_check -- cgit v1.2.3 From 8060bcb109f2d9b85451e84a7a08042da40368df Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 16 Oct 2024 16:18:31 +0300 Subject: usb: typec: Add attribute file showing the supported USB modes of the port This attribute file, named "usb_capability", will show the supported USB modes, which are USB 2.0, USB 3.2 and USB4. These modes are defined in the USB Type-C (R2.0) and USB Power Delivery (R3.0 V2.0) Specifications. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20241016131834.898599-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 549275f8ac1b..f7edced5b10b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -87,6 +87,17 @@ enum typec_orientation { TYPEC_ORIENTATION_REVERSE, }; +enum usb_mode { + USB_MODE_NONE, + USB_MODE_USB2, + USB_MODE_USB3, + USB_MODE_USB4 +}; + +#define USB_CAPABILITY_USB2 BIT(0) +#define USB_CAPABILITY_USB3 BIT(1) +#define USB_CAPABILITY_USB4 BIT(2) + /* * struct enter_usb_data - Enter_USB Message details * @eudo: Enter_USB Data Object @@ -240,6 +251,7 @@ struct typec_partner_desc { * @port_type_set: Set port type * @pd_get: Get available USB Power Delivery Capabilities. * @pd_set: Set USB Power Delivery Capabilities. + * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message */ struct typec_operations { int (*try_role)(struct typec_port *port, int role); @@ -250,6 +262,7 @@ struct typec_operations { enum typec_port_type type); struct usb_power_delivery **(*pd_get)(struct typec_port *port); int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd); + int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode); }; enum usb_pd_svdm_ver { @@ -267,6 +280,7 @@ enum usb_pd_svdm_ver { * @svdm_version: USB PD Structured VDM version if supported * @prefer_role: Initial role preference (DRP ports). * @accessory: Supported Accessory Modes + * @usb_capability: Supported USB Modes * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info * @pd: Optional USB Power Delivery Support @@ -283,6 +297,7 @@ struct typec_capability { int prefer_role; enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; unsigned int orientation_aware:1; + u8 usb_capability; struct fwnode_handle *fwnode; void *driver_data; @@ -350,6 +365,8 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode); + /** * struct typec_connector - Representation of Type-C port for external drivers * @attach: notification about device removal -- cgit v1.2.3 From 2140a952c4e9c73993ae6d9c2cc674d263d4beab Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 16 Oct 2024 16:18:32 +0300 Subject: usb: typec: Add attribute file showing the USB Modes of the partner This attribute file shows the supported USB modes (USB 2.0, USB 3.0 and USB4) of the partner, and the currently active mode. The active mode is determined primarily by checking the speed of the enumerated USB device. When USB Power Delivery is supported, the active USB mode should be always the mode that was used with the Enter_USB Message, regardless of the result of the USB enumeration. The port drivers can separately assign the mode with a dedicated API. If USB Power Delivery Identity is supplied for the partner device, the supported modes are extracted from it. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20241016131834.898599-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index f7edced5b10b..d616b8807000 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -220,6 +220,7 @@ struct typec_cable_desc { * @accessory: Audio, Debug or none. * @identity: Discover Identity command data * @pd_revision: USB Power Delivery Specification Revision if supported + * @usb_capability: Supported USB Modes * @attach: Notification about attached USB device * @deattach: Notification about removed USB device * @@ -237,6 +238,7 @@ struct typec_partner_desc { enum typec_accessory accessory; struct usb_pd_identity *identity; u16 pd_revision; /* 0300H = "3.0" */ + u8 usb_capability; void (*attach)(struct typec_partner *partner, struct device *dev); void (*deattach)(struct typec_partner *partner, struct device *dev); @@ -252,6 +254,7 @@ struct typec_partner_desc { * @pd_get: Get available USB Power Delivery Capabilities. * @pd_set: Set USB Power Delivery Capabilities. * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message + * @enter_usb_mode: Change the active USB Mode */ struct typec_operations { int (*try_role)(struct typec_port *port, int role); @@ -263,6 +266,7 @@ struct typec_operations { struct usb_power_delivery **(*pd_get)(struct typec_port *port); int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd); int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode); + int (*enter_usb_mode)(struct typec_port *port, enum usb_mode mode); }; enum usb_pd_svdm_ver { @@ -365,6 +369,7 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +void typec_partner_set_usb_mode(struct typec_partner *partner, enum usb_mode usb_mode); void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode); /** -- cgit v1.2.3 From d42a254633c773921884a19e8a1a0f53a31150c3 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 16 Oct 2024 13:20:09 +0100 Subject: drm/sched: Optimise drm_sched_entity_push_job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In FIFO mode (which is the default), both drm_sched_entity_push_job() and drm_sched_rq_update_fifo(), where the latter calls the former, are currently taking and releasing the same entity->rq_lock. We can avoid that design inelegance, and also have a miniscule efficiency improvement on the submit from idle path, by introducing a new drm_sched_rq_update_fifo_locked() helper and pulling up the lock taking to its callers. v2: * Remove drm_sched_rq_update_fifo() altogether. (Christian) v3: * Improved commit message. (Philipp) Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Alex Deucher Cc: Luben Tuikov Cc: Matthew Brost Cc: Philipp Stanner Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-2-tursulin@igalia.com --- include/drm/gpu_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index e9f075f51db3..3658a6cb048e 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -593,7 +593,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity); -void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts); +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts); int drm_sched_entity_init(struct drm_sched_entity *entity, enum drm_sched_priority priority, -- cgit v1.2.3 From a6f46283e952fe50dea5f932a1e4f0b6b2370968 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 16 Oct 2024 13:20:11 +0100 Subject: drm/sched: Re-order struct drm_sched_rq members for clarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current kerneldoc for struct drm_sched_rq incompletely documents what fields are protected by the lock. This is not good because it is misleading. Lets fix it by listing all the elements which are protected by the lock. While at it, lets also re-order the members so all protected by the lock are in a single group. v2: * Refer variables by kerneldoc syntax, more verbose commit text. (Philipp) Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Alex Deucher Cc: Luben Tuikov Cc: Matthew Brost Cc: Philipp Stanner Reviewed-by: Christian König Reviewed-by: Philipp Stanner Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-4-tursulin@igalia.com --- include/drm/gpu_scheduler.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 3658a6cb048e..b6d095074c19 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -243,10 +243,10 @@ struct drm_sched_entity { /** * struct drm_sched_rq - queue of entities to be scheduled. * - * @lock: to modify the entities list. * @sched: the scheduler to which this rq belongs to. - * @entities: list of the entities to be scheduled. + * @lock: protects @entities, @rb_tree_root and @current_entity. * @current_entity: the entity which is to be scheduled. + * @entities: list of the entities to be scheduled. * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling * * Run queue is a set of entities scheduling command submissions for @@ -254,10 +254,12 @@ struct drm_sched_entity { * the next entity to emit commands from. */ struct drm_sched_rq { - spinlock_t lock; struct drm_gpu_scheduler *sched; - struct list_head entities; + + spinlock_t lock; + /* Following members are protected by the @lock: */ struct drm_sched_entity *current_entity; + struct list_head entities; struct rb_root_cached rb_tree_root; }; -- cgit v1.2.3 From f93126f5d55920d1447ef00a3fbe6706f40f53de Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 16 Oct 2024 13:20:12 +0100 Subject: drm/sched: Re-group and rename the entity run-queue lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When writing to a drm_sched_entity's run-queue, writers are protected through the lock drm_sched_entity.rq_lock. This naming, however, frequently collides with the separate internal lock of struct drm_sched_rq, resulting in uses like this: spin_lock(&entity->rq_lock); spin_lock(&entity->rq->lock); Rename drm_sched_entity.rq_lock to improve readability. While at it, re-order that struct's members to make it more obvious what the lock protects. v2: * Rename some rq_lock straddlers in kerneldoc, improve commit text. (Philipp) Signed-off-by: Tvrtko Ursulin Suggested-by: Christian König Cc: Alex Deucher Cc: Luben Tuikov Cc: Matthew Brost Cc: Philipp Stanner Reviewed-by: Christian König [pstanner: Fix typo in docstring] Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-5-tursulin@igalia.com --- include/drm/gpu_scheduler.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index b6d095074c19..e2b612817e3b 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -96,14 +96,22 @@ struct drm_sched_entity { */ struct list_head list; + /** + * @lock: + * + * Lock protecting the run-queue (@rq) to which this entity belongs, + * @priority and the list of schedulers (@sched_list, @num_sched_list). + */ + spinlock_t lock; + /** * @rq: * * Runqueue on which this entity is currently scheduled. * * FIXME: Locking is very unclear for this. Writers are protected by - * @rq_lock, but readers are generally lockless and seem to just race - * with not even a READ_ONCE. + * @lock, but readers are generally lockless and seem to just race with + * not even a READ_ONCE. */ struct drm_sched_rq *rq; @@ -136,17 +144,10 @@ struct drm_sched_entity { * @priority: * * Priority of the entity. This can be modified by calling - * drm_sched_entity_set_priority(). Protected by &rq_lock. + * drm_sched_entity_set_priority(). Protected by @lock. */ enum drm_sched_priority priority; - /** - * @rq_lock: - * - * Lock to modify the runqueue to which this entity belongs. - */ - spinlock_t rq_lock; - /** * @job_queue: the list of jobs of this entity. */ -- cgit v1.2.3 From 134e71bd1edcc7252b64ca31efe88edfef86d784 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 16 Oct 2024 13:20:13 +0100 Subject: drm/sched: Further optimise drm_sched_entity_push_job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having removed one re-lock cycle on the entity->lock in a patch titled "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit larger refactoring we can do the same optimisation on the rq->lock. (Currently both drm_sched_rq_add_entity() and drm_sched_rq_update_fifo_locked() take and release the same lock.) To achieve this we make drm_sched_rq_update_fifo_locked() and drm_sched_rq_add_entity() expect the rq->lock to be held. We also align drm_sched_rq_update_fifo_locked(), drm_sched_rq_add_entity() and drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a parameter to the latter. v2: * Fix after rebase of the series. * Avoid naming inconsistency between drm_sched_rq_add/remove. (Christian) Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Alex Deucher Cc: Luben Tuikov Cc: Matthew Brost Cc: Philipp Stanner Reviewed-by: Christian König Reviewed-by: Philipp Stanner Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-6-tursulin@igalia.com --- include/drm/gpu_scheduler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index e2b612817e3b..ab161289d1bf 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity); -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts); +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq, ktime_t ts); int drm_sched_entity_init(struct drm_sched_entity *entity, enum drm_sched_priority priority, -- cgit v1.2.3 From 5bd0d8e687bf04fdd3d4a733a6bb17e25d4a1de2 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Wed, 16 Oct 2024 23:06:51 +0300 Subject: drm/bridge: synopsys: Add DW HDMI QP TX Controller support library The Synopsys DesignWare HDMI 2.1 Quad-Pixel (QP) TX Controller IP supports the following features, among others: * Fixed Rate Link (FRL) * Display Stream Compression (DSC) * 4K@120Hz and 8K@60Hz video modes * Variable Refresh Rate (VRR) including Quick Media Switching (QMS), aka Cinema VRR * Fast Vactive (FVA), aka Quick Frame Transport (QFT) * SCDC I2C DDC access * TMDS Scrambler enabling 2160p@60Hz with RGB/YCbCr4:4:4 * YCbCr4:2:0 enabling 2160p@60Hz at lower HDMI link speeds * Multi-stream audio * Enhanced Audio Return Channel (EARC) Add library containing common helpers to enable basic support, i.e. RGB output up to 4K@30Hz, without audio, CEC or any HDMI 2.1 specific features. Co-developed-by: Algea Cao Signed-off-by: Algea Cao Tested-by: Heiko Stuebner Reviewed-by: Maxime Ripard Signed-off-by: Cristian Ciocaltea Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20241016-b4-rk3588-bridge-upstream-v10-1-87ef92a6d14e@collabora.com Signed-off-by: Maxime Ripard --- include/drm/bridge/dw_hdmi_qp.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/drm/bridge/dw_hdmi_qp.h (limited to 'include') diff --git a/include/drm/bridge/dw_hdmi_qp.h b/include/drm/bridge/dw_hdmi_qp.h new file mode 100644 index 000000000000..e9be6d507ad9 --- /dev/null +++ b/include/drm/bridge/dw_hdmi_qp.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd. + * Copyright (c) 2024 Collabora Ltd. + */ + +#ifndef __DW_HDMI_QP__ +#define __DW_HDMI_QP__ + +struct device; +struct drm_encoder; +struct dw_hdmi_qp; +struct platform_device; + +struct dw_hdmi_qp_phy_ops { + int (*init)(struct dw_hdmi_qp *hdmi, void *data); + void (*disable)(struct dw_hdmi_qp *hdmi, void *data); + enum drm_connector_status (*read_hpd)(struct dw_hdmi_qp *hdmi, void *data); + void (*setup_hpd)(struct dw_hdmi_qp *hdmi, void *data); +}; + +struct dw_hdmi_qp_plat_data { + const struct dw_hdmi_qp_phy_ops *phy_ops; + void *phy_data; + int main_irq; +}; + +struct dw_hdmi_qp *dw_hdmi_qp_bind(struct platform_device *pdev, + struct drm_encoder *encoder, + const struct dw_hdmi_qp_plat_data *plat_data); +void dw_hdmi_qp_resume(struct device *dev, struct dw_hdmi_qp *hdmi); +#endif /* __DW_HDMI_QP__ */ -- cgit v1.2.3 From bae7aff5818b89d5d72f6ab0135c8faf3fa54318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Mon, 7 Oct 2024 15:49:17 +0200 Subject: dt-bindings: clock: add Mobileye EyeQ6L/EyeQ6H clock indexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add #defines for Mobileye EyeQ6L and EyeQ6H SoC clocks. Constant prefixes are: - EQ6LC_PLL_: EyeQ6L clock PLLs - EQ6HC_SOUTH_PLL_: EyeQ6H south OLB PLLs - EQ6HC_SOUTH_DIV_: EyeQ6H south OLB divider clocks - EQ6HC_ACC_PLL_: EyeQ6H accelerator OLB PLLs Acked-by: Krzysztof Kozlowski Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20241007-mbly-clk-v5-2-e9d8994269cb@bootlin.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mobileye,eyeq5-clk.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/mobileye,eyeq5-clk.h b/include/dt-bindings/clock/mobileye,eyeq5-clk.h index 26d8930335e4..b433c1772c28 100644 --- a/include/dt-bindings/clock/mobileye,eyeq5-clk.h +++ b/include/dt-bindings/clock/mobileye,eyeq5-clk.h @@ -19,4 +19,25 @@ #define EQ5C_DIV_OSPI 10 +#define EQ6LC_PLL_DDR 0 +#define EQ6LC_PLL_CPU 1 +#define EQ6LC_PLL_PER 2 +#define EQ6LC_PLL_VDI 3 + +#define EQ6HC_SOUTH_PLL_VDI 0 +#define EQ6HC_SOUTH_PLL_PCIE 1 +#define EQ6HC_SOUTH_PLL_PER 2 +#define EQ6HC_SOUTH_PLL_ISP 3 + +#define EQ6HC_SOUTH_DIV_EMMC 4 +#define EQ6HC_SOUTH_DIV_OSPI_REF 5 +#define EQ6HC_SOUTH_DIV_OSPI_SYS 6 +#define EQ6HC_SOUTH_DIV_TSU 7 + +#define EQ6HC_ACC_PLL_XNN 0 +#define EQ6HC_ACC_PLL_VMP 1 +#define EQ6HC_ACC_PLL_PMA 2 +#define EQ6HC_ACC_PLL_MPC 3 +#define EQ6HC_ACC_PLL_NOC 4 + #endif -- cgit v1.2.3 From 6a136805e3c1532d2414b298c024429943cfd482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Mon, 7 Oct 2024 15:49:18 +0200 Subject: clk: divider: Introduce CLK_DIVIDER_EVEN_INTEGERS flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CLK_DIVIDER_EVEN_INTEGERS flag to support divisor of 2, 4, 6, etc. The same divisor can be done using a table, which would be big and wasteful for a clock dividor of width 8 (256 entries). Require increasing flags size from u8 to u16 because CLK_DIVIDER_EVEN_INTEGERS is the eighth flag. u16 is used inside struct clk_divider; `unsigned long` is used for function arguments. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20241007-mbly-clk-v5-3-e9d8994269cb@bootlin.com Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 7e43caabb54b..dbe793964c24 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -689,13 +689,15 @@ struct clk_div_table { * CLK_DIVIDER_BIG_ENDIAN - By default little endian register accesses are used * for the divider register. Setting this flag makes the register accesses * big endian. + * CLK_DIVIDER_EVEN_INTEGERS - clock divisor is 2, 4, 6, 8, 10, etc. + * Formula is 2 * (value read from hardware + 1). */ struct clk_divider { struct clk_hw hw; void __iomem *reg; u8 shift; u8 width; - u8 flags; + u16 flags; const struct clk_div_table *table; spinlock_t *lock; }; @@ -711,6 +713,7 @@ struct clk_divider { #define CLK_DIVIDER_READ_ONLY BIT(5) #define CLK_DIVIDER_MAX_AT_ZERO BIT(6) #define CLK_DIVIDER_BIG_ENDIAN BIT(7) +#define CLK_DIVIDER_EVEN_INTEGERS BIT(8) extern const struct clk_ops clk_divider_ops; extern const struct clk_ops clk_divider_ro_ops; @@ -740,19 +743,21 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, - u8 clk_divider_flags, const struct clk_div_table *table, - spinlock_t *lock); + unsigned long clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock -- cgit v1.2.3 From ea1cca026842bc83af92785e61a4433bcc71ea70 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 17 Oct 2024 10:17:05 +0300 Subject: dt-bindings: clock: Add MediaTek MT6735 clock and reset bindings Add clock definitions for the main clock and reset controllers of MT6735 (apmixedsys, topckgen, infracfg and pericfg). Signed-off-by: Yassine Oudjana Reviewed-by: Conor Dooley Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20241017071708.38663-2-y.oudjana@protonmail.com Signed-off-by: Stephen Boyd --- .../dt-bindings/clock/mediatek,mt6735-apmixedsys.h | 16 +++++ .../dt-bindings/clock/mediatek,mt6735-infracfg.h | 25 +++++++ .../dt-bindings/clock/mediatek,mt6735-pericfg.h | 37 ++++++++++ .../dt-bindings/clock/mediatek,mt6735-topckgen.h | 79 ++++++++++++++++++++++ .../dt-bindings/reset/mediatek,mt6735-infracfg.h | 27 ++++++++ .../dt-bindings/reset/mediatek,mt6735-pericfg.h | 31 +++++++++ 6 files changed, 215 insertions(+) create mode 100644 include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-infracfg.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-pericfg.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-topckgen.h create mode 100644 include/dt-bindings/reset/mediatek,mt6735-infracfg.h create mode 100644 include/dt-bindings/reset/mediatek,mt6735-pericfg.h (limited to 'include') diff --git a/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h b/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h new file mode 100644 index 000000000000..b4705204409c --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_APMIXEDSYS_H +#define _DT_BINDINGS_CLK_MT6735_APMIXEDSYS_H + +#define CLK_APMIXED_ARMPLL 0 +#define CLK_APMIXED_MAINPLL 1 +#define CLK_APMIXED_UNIVPLL 2 +#define CLK_APMIXED_MMPLL 3 +#define CLK_APMIXED_MSDCPLL 4 +#define CLK_APMIXED_VENCPLL 5 +#define CLK_APMIXED_TVDPLL 6 +#define CLK_APMIXED_APLL1 7 +#define CLK_APMIXED_APLL2 8 + +#endif diff --git a/include/dt-bindings/clock/mediatek,mt6735-infracfg.h b/include/dt-bindings/clock/mediatek,mt6735-infracfg.h new file mode 100644 index 000000000000..d8dd51e15637 --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-infracfg.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_INFRACFG_H +#define _DT_BINDINGS_CLK_MT6735_INFRACFG_H + +#define CLK_INFRA_DBG 0 +#define CLK_INFRA_GCE 1 +#define CLK_INFRA_TRBG 2 +#define CLK_INFRA_CPUM 3 +#define CLK_INFRA_DEVAPC 4 +#define CLK_INFRA_AUDIO 5 +#define CLK_INFRA_GCPU 6 +#define CLK_INFRA_L2C_SRAM 7 +#define CLK_INFRA_M4U 8 +#define CLK_INFRA_CLDMA 9 +#define CLK_INFRA_CONNMCU_BUS 10 +#define CLK_INFRA_KP 11 +#define CLK_INFRA_APXGPT 12 +#define CLK_INFRA_SEJ 13 +#define CLK_INFRA_CCIF0_AP 14 +#define CLK_INFRA_CCIF1_AP 15 +#define CLK_INFRA_PMIC_SPI 16 +#define CLK_INFRA_PMIC_WRAP 17 + +#endif diff --git a/include/dt-bindings/clock/mediatek,mt6735-pericfg.h b/include/dt-bindings/clock/mediatek,mt6735-pericfg.h new file mode 100644 index 000000000000..16bc21bbd95b --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-pericfg.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_PERICFG_H +#define _DT_BINDINGS_CLK_MT6735_PERICFG_H + +#define CLK_PERI_DISP_PWM 0 +#define CLK_PERI_THERM 1 +#define CLK_PERI_PWM1 2 +#define CLK_PERI_PWM2 3 +#define CLK_PERI_PWM3 4 +#define CLK_PERI_PWM4 5 +#define CLK_PERI_PWM5 6 +#define CLK_PERI_PWM6 7 +#define CLK_PERI_PWM7 8 +#define CLK_PERI_PWM 9 +#define CLK_PERI_USB0 10 +#define CLK_PERI_IRDA 11 +#define CLK_PERI_APDMA 12 +#define CLK_PERI_MSDC30_0 13 +#define CLK_PERI_MSDC30_1 14 +#define CLK_PERI_MSDC30_2 15 +#define CLK_PERI_MSDC30_3 16 +#define CLK_PERI_UART0 17 +#define CLK_PERI_UART1 18 +#define CLK_PERI_UART2 19 +#define CLK_PERI_UART3 20 +#define CLK_PERI_UART4 21 +#define CLK_PERI_BTIF 22 +#define CLK_PERI_I2C0 23 +#define CLK_PERI_I2C1 24 +#define CLK_PERI_I2C2 25 +#define CLK_PERI_I2C3 26 +#define CLK_PERI_AUXADC 27 +#define CLK_PERI_SPI0 28 +#define CLK_PERI_IRTX 29 + +#endif diff --git a/include/dt-bindings/clock/mediatek,mt6735-topckgen.h b/include/dt-bindings/clock/mediatek,mt6735-topckgen.h new file mode 100644 index 000000000000..d4b1e113cc0a --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-topckgen.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_TOPCKGEN_H +#define _DT_BINDINGS_CLK_MT6735_TOPCKGEN_H + +#define CLK_TOP_AD_SYS_26M_CK 0 +#define CLK_TOP_CLKPH_MCK_O 1 +#define CLK_TOP_DMPLL 2 +#define CLK_TOP_DPI_CK 3 +#define CLK_TOP_WHPLL_AUDIO_CK 4 + +#define CLK_TOP_SYSPLL_D2 5 +#define CLK_TOP_SYSPLL_D3 6 +#define CLK_TOP_SYSPLL_D5 7 +#define CLK_TOP_SYSPLL1_D2 8 +#define CLK_TOP_SYSPLL1_D4 9 +#define CLK_TOP_SYSPLL1_D8 10 +#define CLK_TOP_SYSPLL1_D16 11 +#define CLK_TOP_SYSPLL2_D2 12 +#define CLK_TOP_SYSPLL2_D4 13 +#define CLK_TOP_SYSPLL3_D2 14 +#define CLK_TOP_SYSPLL3_D4 15 +#define CLK_TOP_SYSPLL4_D2 16 +#define CLK_TOP_SYSPLL4_D4 17 +#define CLK_TOP_UNIVPLL_D2 18 +#define CLK_TOP_UNIVPLL_D3 19 +#define CLK_TOP_UNIVPLL_D5 20 +#define CLK_TOP_UNIVPLL_D26 21 +#define CLK_TOP_UNIVPLL1_D2 22 +#define CLK_TOP_UNIVPLL1_D4 23 +#define CLK_TOP_UNIVPLL1_D8 24 +#define CLK_TOP_UNIVPLL2_D2 25 +#define CLK_TOP_UNIVPLL2_D4 26 +#define CLK_TOP_UNIVPLL2_D8 27 +#define CLK_TOP_UNIVPLL3_D2 28 +#define CLK_TOP_UNIVPLL3_D4 29 +#define CLK_TOP_MSDCPLL_D2 30 +#define CLK_TOP_MSDCPLL_D4 31 +#define CLK_TOP_MSDCPLL_D8 32 +#define CLK_TOP_MSDCPLL_D16 33 +#define CLK_TOP_VENCPLL_D3 34 +#define CLK_TOP_TVDPLL_D2 35 +#define CLK_TOP_TVDPLL_D4 36 +#define CLK_TOP_DMPLL_D2 37 +#define CLK_TOP_DMPLL_D4 38 +#define CLK_TOP_DMPLL_D8 39 +#define CLK_TOP_AD_SYS_26M_D2 40 + +#define CLK_TOP_AXI_SEL 41 +#define CLK_TOP_MEM_SEL 42 +#define CLK_TOP_DDRPHY_SEL 43 +#define CLK_TOP_MM_SEL 44 +#define CLK_TOP_PWM_SEL 45 +#define CLK_TOP_VDEC_SEL 46 +#define CLK_TOP_MFG_SEL 47 +#define CLK_TOP_CAMTG_SEL 48 +#define CLK_TOP_UART_SEL 49 +#define CLK_TOP_SPI_SEL 50 +#define CLK_TOP_USB20_SEL 51 +#define CLK_TOP_MSDC50_0_SEL 52 +#define CLK_TOP_MSDC30_0_SEL 53 +#define CLK_TOP_MSDC30_1_SEL 54 +#define CLK_TOP_MSDC30_2_SEL 55 +#define CLK_TOP_MSDC30_3_SEL 56 +#define CLK_TOP_AUDIO_SEL 57 +#define CLK_TOP_AUDINTBUS_SEL 58 +#define CLK_TOP_PMICSPI_SEL 59 +#define CLK_TOP_SCP_SEL 60 +#define CLK_TOP_ATB_SEL 61 +#define CLK_TOP_DPI0_SEL 62 +#define CLK_TOP_SCAM_SEL 63 +#define CLK_TOP_MFG13M_SEL 64 +#define CLK_TOP_AUD1_SEL 65 +#define CLK_TOP_AUD2_SEL 66 +#define CLK_TOP_IRDA_SEL 67 +#define CLK_TOP_IRTX_SEL 68 +#define CLK_TOP_DISPPWM_SEL 69 + +#endif diff --git a/include/dt-bindings/reset/mediatek,mt6735-infracfg.h b/include/dt-bindings/reset/mediatek,mt6735-infracfg.h new file mode 100644 index 000000000000..9df969090377 --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt6735-infracfg.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_MT6735_INFRACFG_H +#define _DT_BINDINGS_RESET_MT6735_INFRACFG_H + +#define MT6735_INFRA_RST0_EMI_REG 0 +#define MT6735_INFRA_RST0_DRAMC0_AO 1 +#define MT6735_INFRA_RST0_AP_CIRQ_EINT 2 +#define MT6735_INFRA_RST0_APXGPT 3 +#define MT6735_INFRA_RST0_SCPSYS 4 +#define MT6735_INFRA_RST0_KP 5 +#define MT6735_INFRA_RST0_PMIC_WRAP 6 +#define MT6735_INFRA_RST0_CLDMA_AO_TOP 7 +#define MT6735_INFRA_RST0_USBSIF_TOP 8 +#define MT6735_INFRA_RST0_EMI 9 +#define MT6735_INFRA_RST0_CCIF 10 +#define MT6735_INFRA_RST0_DRAMC0 11 +#define MT6735_INFRA_RST0_EMI_AO_REG 12 +#define MT6735_INFRA_RST0_CCIF_AO 13 +#define MT6735_INFRA_RST0_TRNG 14 +#define MT6735_INFRA_RST0_SYS_CIRQ 15 +#define MT6735_INFRA_RST0_GCE 16 +#define MT6735_INFRA_RST0_M4U 17 +#define MT6735_INFRA_RST0_CCIF1 18 +#define MT6735_INFRA_RST0_CLDMA_TOP_PD 19 + +#endif diff --git a/include/dt-bindings/reset/mediatek,mt6735-pericfg.h b/include/dt-bindings/reset/mediatek,mt6735-pericfg.h new file mode 100644 index 000000000000..a62bb192835a --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt6735-pericfg.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_MT6735_PERICFG_H +#define _DT_BINDINGS_RESET_MT6735_PERICFG_H + +#define MT6735_PERI_RST0_UART0 0 +#define MT6735_PERI_RST0_UART1 1 +#define MT6735_PERI_RST0_UART2 2 +#define MT6735_PERI_RST0_UART3 3 +#define MT6735_PERI_RST0_UART4 4 +#define MT6735_PERI_RST0_BTIF 5 +#define MT6735_PERI_RST0_DISP_PWM_PERI 6 +#define MT6735_PERI_RST0_PWM 7 +#define MT6735_PERI_RST0_AUXADC 8 +#define MT6735_PERI_RST0_DMA 9 +#define MT6735_PERI_RST0_IRDA 10 +#define MT6735_PERI_RST0_IRTX 11 +#define MT6735_PERI_RST0_THERM 12 +#define MT6735_PERI_RST0_MSDC2 13 +#define MT6735_PERI_RST0_MSDC3 14 +#define MT6735_PERI_RST0_MSDC0 15 +#define MT6735_PERI_RST0_MSDC1 16 +#define MT6735_PERI_RST0_I2C0 17 +#define MT6735_PERI_RST0_I2C1 18 +#define MT6735_PERI_RST0_I2C2 19 +#define MT6735_PERI_RST0_I2C3 20 +#define MT6735_PERI_RST0_USB 21 + +#define MT6735_PERI_RST1_SPI0 22 + +#endif -- cgit v1.2.3 From 44fcc479a574a4055fb6aed1f786d39999466383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 17 Oct 2024 06:37:33 +0200 Subject: power: supply: hwmon: move interface to private header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interface of power_supply_hwmon.c is only meant to be used by the psy core. Remove it from the public header file and use the private one instead. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241017-power-supply-cleanups-v2-1-cb0f5deab088@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c1d6cb7f4c40..b98106e1a90f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -937,19 +937,6 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) return false; } -#ifdef CONFIG_POWER_SUPPLY_HWMON -int power_supply_add_hwmon_sysfs(struct power_supply *psy); -void power_supply_remove_hwmon_sysfs(struct power_supply *psy); -#else -static inline int power_supply_add_hwmon_sysfs(struct power_supply *psy) -{ - return 0; -} - -static inline -void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} -#endif - #ifdef CONFIG_SYSFS ssize_t power_supply_charge_behaviour_show(struct device *dev, unsigned int available_behaviours, -- cgit v1.2.3 From 22823157d90c4631a951920090686c20c459b36f Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Thu, 12 Sep 2024 22:10:37 +0300 Subject: reset: npcm: register npcm8xx clock auxiliary bus device Add NPCM8xx clock controller auxiliary bus device registration. The NPCM8xx clock controller is registered as an aux device because the reset and the clock controller share the same register region. Signed-off-by: Tomer Maimon Tested-by: Benjamin Fair Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20240912191038.981105-3-tmaimon77@gmail.com Signed-off-by: Stephen Boyd --- include/soc/nuvoton/clock-npcm8xx.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/soc/nuvoton/clock-npcm8xx.h (limited to 'include') diff --git a/include/soc/nuvoton/clock-npcm8xx.h b/include/soc/nuvoton/clock-npcm8xx.h new file mode 100644 index 000000000000..1d974e89d8a8 --- /dev/null +++ b/include/soc/nuvoton/clock-npcm8xx.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SOC_NPCM8XX_CLOCK_H +#define __SOC_NPCM8XX_CLOCK_H + +#include +#include + +struct npcm_clock_adev { + void __iomem *base; + struct auxiliary_device adev; +}; + +static inline struct npcm_clock_adev *to_npcm_clock_adev(struct auxiliary_device *_adev) +{ + return container_of(_adev, struct npcm_clock_adev, adev); +} + +#endif -- cgit v1.2.3 From 0784181b44af831a3fa52e1e5ff77c388d699dba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 26 Sep 2024 16:17:37 +0100 Subject: lockdep: Add lockdep_cleanup_dead_cpu() Add a function to check that an offline CPU has left the tracing infrastructure in a sane state. Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead() function called safe_halt() instead of raw_safe_halt(), which had the side-effect of setting the hardirqs_enabled flag for the offline CPU. On x86 this triggered warnings from lockdep_assert_irqs_disabled() when the CPU was brought back online again later. These warnings were too early for the exception to be handled correctly, leading to a triple-fault. Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode, print the events leading up to it, and correct it so that the CPU can come online again correctly. Re-introducing the original bug now merely results in this warning instead: [ 61.556652] smpboot: CPU 1 is now offline [ 61.556769] CPU 1 left hardirqs enabled! [ 61.556915] irq event stamp: 128149 [ 61.556965] hardirqs last enabled at (128149): [] acpi_idle_play_dead+0x46/0x70 [ 61.557055] hardirqs last disabled at (128148): [] do_idle+0x90/0xe0 [ 61.557117] softirqs last enabled at (128078): [] __do_softirq+0x31c/0x423 [ 61.557199] softirqs last disabled at (128065): [] __irq_exit_rcu+0x91/0x100 [boqun: Capitalize the title and reword the message a bit] Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org --- include/linux/irqflags.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3f003d5fde53..57b074e0cfbb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -18,6 +18,8 @@ #include #include +struct task_struct; + /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); @@ -25,12 +27,16 @@ extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); + extern void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } + static inline void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle) {} #endif #ifdef CONFIG_TRACE_IRQFLAGS -- cgit v1.2.3 From d7fe143cb115076fed0126ad8cf5ba6c3e575e43 Mon Sep 17 00:00:00 2001 From: Ahmed Ehab Date: Sun, 25 Aug 2024 01:10:30 +0300 Subject: locking/lockdep: Avoid creating new name string literals in lockdep_set_subclass() Syzbot reports a problem that a warning will be triggered while searching a lock class in look_up_lock_class(). The cause of the issue is that a new name is created and used by lockdep_set_subclass() instead of using the existing one. This results in a lock instance has a different name pointer than previous registered one stored in lock class, and WARN_ONCE() is triggered because of that in look_up_lock_class(). To fix this, change lockdep_set_subclass() to use the existing name instead of a new one. Hence, no new name will be created by lockdep_set_subclass(). Hence, the warning is avoided. [boqun: Reword the commit log to state the correct issue] Reported-by: Fixes: de8f5e4f2dc1f ("lockdep: Introduce wait-type checks") Cc: stable@vger.kernel.org Signed-off-by: Ahmed Ehab Signed-off-by: Boqun Feng Link: https://lore.kernel.org/lkml/20240824221031.7751-1-bottaawesome633@gmail.com/ --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 217f7abf2cbf..67964dc4db95 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -173,7 +173,7 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + lockdep_init_map_type(&(lock)->dep_map, (lock)->dep_map.name, (lock)->dep_map.key, sub,\ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) -- cgit v1.2.3 From df7e8b522a6090162ecb50fd298ebc4db137562b Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Oct 2024 10:55:20 +0200 Subject: drm/client: Move client event handlers to drm_client_event.c A number of DRM-client functions serve as entry points from device operations to client code. Moving them info a separate file will later allow for a more fine-grained kernel configuration. For most of the users it is sufficient to include instead of the full driver-side interface in v2: - rename new files to drm_client_event.{c,h} Signed-off-by: Thomas Zimmermann Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Karol Herbst Cc: Lyude Paul Cc: Danilo Krummrich Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241014085740.582287-7-tzimmermann@suse.de --- include/drm/drm_client.h | 4 ---- include/drm/drm_client_event.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 include/drm/drm_client_event.h (limited to 'include') diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index bc0e66f9c425..dfd5afcc9463 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -121,10 +121,6 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, void drm_client_release(struct drm_client_dev *client); void drm_client_register(struct drm_client_dev *client); -void drm_client_dev_unregister(struct drm_device *dev); -void drm_client_dev_hotplug(struct drm_device *dev); -void drm_client_dev_restore(struct drm_device *dev); - /** * struct drm_client_buffer - DRM client buffer */ diff --git a/include/drm/drm_client_event.h b/include/drm/drm_client_event.h new file mode 100644 index 000000000000..2c8915241120 --- /dev/null +++ b/include/drm/drm_client_event.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ + +#ifndef _DRM_CLIENT_EVENT_H_ +#define _DRM_CLIENT_EVENT_H_ + +struct drm_device; + +void drm_client_dev_unregister(struct drm_device *dev); +void drm_client_dev_hotplug(struct drm_device *dev); +void drm_client_dev_restore(struct drm_device *dev); + +#endif -- cgit v1.2.3 From bf17766f108309027aac2bfe184df6088dfd7384 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Oct 2024 10:55:21 +0200 Subject: drm/client: Move suspend/resume into DRM client callbacks Suspend and resume is still tied to fbdev emulation. Modeset helpers and several drivers call drm_fb_helper_set_suspend_unlocked() to inform the fbdev client about suspend/resume events. To make it work with arbitrary clients, add per-client callback functions for suspend and resume. Implement them for fbdev emulation with the existing drm_fb_helper_set_suspend_unlocked(). Then update DRM's modeset helpers to call the new interface. Clients that are not fbdev can now implement suspend/resume to their requirements. The callback parameter holds_console_lock is a workaround for i915, radeon and xe, which possibly call the interface while having the console lock acquired. Even though the commit doesn't modify these drivers, it already adds the flag to avoid churn later on. New code should not hold the console lock. v4: - clarify holds_console_lock in commit description (Jonathan) Signed-off-by: Thomas Zimmermann Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241014085740.582287-8-tzimmermann@suse.de --- include/drm/drm_client.h | 35 +++++++++++++++++++++++++++++++++++ include/drm/drm_client_event.h | 2 ++ 2 files changed, 37 insertions(+) (limited to 'include') diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index dfd5afcc9463..c03c4b0f3e94 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -63,6 +63,34 @@ struct drm_client_funcs { * This callback is optional. */ int (*hotplug)(struct drm_client_dev *client); + + /** + * @suspend: + * + * Called when suspending the device. + * + * This callback is optional. + * + * FIXME: Some callers hold the console lock when invoking this + * function. This interferes with fbdev emulation, which + * also tries to acquire the lock. Push the console lock + * into the callback and remove 'holds_console_lock'. + */ + int (*suspend)(struct drm_client_dev *client, bool holds_console_lock); + + /** + * @resume: + * + * Called when resuming the device from suspend. + * + * This callback is optional. + * + * FIXME: Some callers hold the console lock when invoking this + * function. This interferes with fbdev emulation, which + * also tries to acquire the lock. Push the console lock + * into the callback and remove 'holds_console_lock'. + */ + int (*resume)(struct drm_client_dev *client, bool holds_console_lock); }; /** @@ -107,6 +135,13 @@ struct drm_client_dev { */ struct drm_mode_set *modesets; + /** + * @suspended: + * + * The client has been suspended. + */ + bool suspended; + /** * @hotplug_failed: * diff --git a/include/drm/drm_client_event.h b/include/drm/drm_client_event.h index 2c8915241120..72c97d111169 100644 --- a/include/drm/drm_client_event.h +++ b/include/drm/drm_client_event.h @@ -8,5 +8,7 @@ struct drm_device; void drm_client_dev_unregister(struct drm_device *dev); void drm_client_dev_hotplug(struct drm_device *dev); void drm_client_dev_restore(struct drm_device *dev); +void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock); +void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock); #endif -- cgit v1.2.3 From 1f828b4dd40264028d9b481c0412e63837d968f6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Oct 2024 10:55:25 +0200 Subject: drm/client: Make client support optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only build client code if DRM_CLIENT has been selected. Automatially do so if one of the default clients has been enabled. If client support has been disabled, the helpers for client-related events are empty and the regular client functions are not present. Amdgpu has an internal DRM client, so it has to select DRM_CLIENT by itself unconditionally. v3: - provide empty drm_client_debugfs_init() if DRM_CLIENT=n (kernel test robot) Signed-off-by: Thomas Zimmermann Cc: Alex Deucher Cc: "Christian König" Cc: Xinhui Pan Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241014085740.582287-12-tzimmermann@suse.de --- include/drm/drm_client.h | 2 -- include/drm/drm_client_event.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index c03c4b0f3e94..3b13cf29ed55 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -236,6 +236,4 @@ int drm_client_modeset_dpms(struct drm_client_dev *client, int mode); drm_for_each_connector_iter(connector, iter) \ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) -void drm_client_debugfs_init(struct drm_device *dev); - #endif diff --git a/include/drm/drm_client_event.h b/include/drm/drm_client_event.h index 72c97d111169..99863554b055 100644 --- a/include/drm/drm_client_event.h +++ b/include/drm/drm_client_event.h @@ -5,10 +5,23 @@ struct drm_device; +#if defined(CONFIG_DRM_CLIENT) void drm_client_dev_unregister(struct drm_device *dev); void drm_client_dev_hotplug(struct drm_device *dev); void drm_client_dev_restore(struct drm_device *dev); void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock); void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock); +#else +static inline void drm_client_dev_unregister(struct drm_device *dev) +{ } +static inline void drm_client_dev_hotplug(struct drm_device *dev) +{ } +static inline void drm_client_dev_restore(struct drm_device *dev) +{ } +static inline void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock) +{ } +static inline void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock) +{ } +#endif #endif -- cgit v1.2.3 From 6eaa83ec229b1e99130456c4f6658430d509c76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Oct 2024 17:11:08 +0300 Subject: PCI: Remove unused PCI_SUBTRACTIVE_DECODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2fe2abf896c1 ("PCI: augment bus resource table with a list") added PCI_SUBTRACTIVE_DECODE which is put into the struct pci_bus_resource flags field but is never read. There seems to never have been users for it. Remove both PCI_SUBTRACTIVE_DECODE and the flags field from the struct pci_bus_resource. Link: https://lore.kernel.org/r/20241017141111.44612-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..6a9cf80d0d4b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -633,18 +633,9 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); * Use pci_bus_for_each_resource() to iterate through all the resources. */ -/* - * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly - * and there's no way to program the bridge with the details of the window. - * This does not apply to ACPI _CRS windows, even with the _DEC subtractive- - * decode bit set, because they are explicit and can be programmed with _SRS. - */ -#define PCI_SUBTRACTIVE_DECODE 0x1 - struct pci_bus_resource { struct list_head list; struct resource *res; - unsigned int flags; }; #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ @@ -1498,8 +1489,7 @@ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset); void pci_free_resource_list(struct list_head *resources); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags); +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); -- cgit v1.2.3 From 469c9cb941480718db52876bbdb95a7a79b34889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Oct 2024 17:11:09 +0300 Subject: PCI: Move struct pci_bus_resource into bus.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct pci_bus_resource is only used in bus.c, so move it there. Link: https://lore.kernel.org/r/20241017141111.44612-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6a9cf80d0d4b..5a9d849b28ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,18 +626,6 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge, int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); -/* - * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond - * to P2P or CardBus bridge windows) go in a table. Additional ones (for - * buses below host bridges or subtractive decode bridges) go in the list. - * Use pci_bus_for_each_resource() to iterate through all the resources. - */ - -struct pci_bus_resource { - struct list_head list; - struct resource *res; -}; - #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ struct pci_bus { -- cgit v1.2.3 From 08ef26ea9ab315b895d57f8fbad41e02ff345bb9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Oct 2024 16:23:58 +0200 Subject: fs: add file_ref As atomic_inc_not_zero() is implemented with a try_cmpxchg() loop it has O(N^2) behaviour under contention with N concurrent operations and it is in a hot path in __fget_files_rcu(). The rcuref infrastructures remedies this problem by using an unconditional increment relying on safe- and dead zones to make this work and requiring rcu protection for the data structure in question. This not just scales better it also introduces overflow protection. However, in contrast to generic rcuref, files require a memory barrier and thus cannot rely on *_relaxed() atomic operations and also require to be built on atomic_long_t as having massive amounts of reference isn't unheard of even if it is just an attack. As suggested by Linus, add a file specific variant instead of making this a generic library. Files are SLAB_TYPESAFE_BY_RCU and thus don't have "regular" rcu protection. In short, freeing of files isn't delayed until a grace period has elapsed. Instead, they are freed immediately and thus can be reused (multiple times) within the same grace period. So when picking a file from the file descriptor table via its file descriptor number it is thus possible to see an elevated reference count on file->f_count even though the file has already been recycled possibly multiple times by another task. To guard against this the vfs will pick the file from the file descriptor table twice. Once before the refcount increment and once after to compare the pointers (grossly simplified). If they match then the file is still valid. If not the caller needs to fput() it. The unconditional increment makes the following race possible as illustrated by rcuref: > Deconstruction race > =================== > > The release operation must be protected by prohibiting a grace period in > order to prevent a possible use after free: > > T1 T2 > put() get() > // ref->refcnt = ONEREF > if (!atomic_add_negative(-1, &ref->refcnt)) > return false; <- Not taken > > // ref->refcnt == NOREF > --> preemption > // Elevates ref->refcnt to ONEREF > if (!atomic_add_negative(1, &ref->refcnt)) > return true; <- taken > > if (put(&p->ref)) { <-- Succeeds > remove_pointer(p); > kfree_rcu(p, rcu); > } > > RCU grace period ends, object is freed > > atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF > > [...] it prevents the grace period which keeps the object alive until > all put() operations complete. Having files by SLAB_TYPESAFE_BY_RCU shouldn't cause any problems for this deconstruction race. Afaict, the only interesting case would be someone freeing the file and someone immediately recycling it within the same grace period and reinitializing file->f_count to ONEREF while a concurrent fput() is doing atomic_cmpxchg(&ref->refcnt, NOREF, DEAD) as in the race above. But this is safe from SLAB_TYPESAFE_BY_RCU's perspective and it should be safe from rcuref's perspective. T1 T2 T3 fput() fget() // f_count->refcnt = ONEREF if (!atomic_add_negative(-1, &f_count->refcnt)) return false; <- Not taken // f_count->refcnt == NOREF --> preemption // Elevates f_count->refcnt to ONEREF if (!atomic_add_negative(1, &f_count->refcnt)) return true; <- taken if (put(&f_count)) { <-- Succeeds remove_pointer(p); /* * Cache is SLAB_TYPESAFE_BY_RCU * so this is freed without a grace period. */ kmem_cache_free(p); } kmem_cache_alloc() init_file() { // Sets f_count->refcnt to ONEREF rcuref_long_init(&f->f_count, 1); } Object has been reused within the same grace period via kmem_cache_alloc()'s SLAB_TYPESAFE_BY_RCU. /* * With SLAB_TYPESAFE_BY_RCU this would be a safe UAF access and * it would work correctly because the atomic_cmpxchg() * will fail because the refcount has been reset to ONEREF by T3. */ atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF However, there are other cases to consider: (1) Benign race due to multiple atomic_long_read() CPU1 CPU2 file_ref_put() // last reference // => count goes negative/FILE_REF_NOREF atomic_long_add_negative_release(-1, &ref->refcnt) -> __file_ref_put() file_ref_get() // goes back from negative/FILE_REF_NOREF to 0 // and file_ref_get() succeeds atomic_long_add_negative(1, &ref->refcnt) // This is immediately followed by file_ref_put() // managing to set FILE_REF_DEAD file_ref_put() // __file_ref_put() continues and sees // cnt > FILE_REF_RELEASED // and splats with // "imbalanced put on file reference count" cnt = atomic_long_read(&ref->refcnt); The race however is benign and the problem is the atomic_long_read(). Instead of performing a separate read this uses atomic_long_dec_return() and pass the value to __file_ref_put(). Thanks to Linus for pointing out that braino. (2) SLAB_TYPESAFE_BY_RCU may cause recycled files to be marked dead When a file is recycled the following race exists: CPU1 CPU2 // @file is already dead and thus // cnt >= FILE_REF_RELEASED. file_ref_get(file) atomic_long_add_negative(1, &ref->refcnt) // We thus call into __file_ref_get() -> __file_ref_get() // which sees cnt >= FILE_REF_RELEASED cnt = atomic_long_read(&ref->refcnt); // In the meantime @file gets freed kmem_cache_free() // and is immediately recycled file = kmem_cache_zalloc() // and the reference count is reinitialized // and the file alive again in someone // else's file descriptor table file_ref_init(&ref->refcnt, 1); // the __file_ref_get() slowpath now continues // and as it saw earlier that cnt >= FILE_REF_RELEASED // it wants to ensure that we're staying in the middle // of the deadzone and unconditionally sets // FILE_REF_DEAD. // This marks @file dead for CPU2... atomic_long_set(&ref->refcnt, FILE_REF_DEAD); // Caller issues a close() system call to close @file close(fd) file = file_close_fd_locked() filp_flush() // The caller sees that cnt >= FILE_REF_RELEASED // and warns the first time... CHECK_DATA_CORRUPTION(file_count(file) == 0) // and then splats a second time because // __file_ref_put() sees cnt >= FILE_REF_RELEASED file_ref_put(&ref->refcnt); -> __file_ref_put() My initial inclination was to replace the unconditional atomic_long_set() with an atomic_long_try_cmpxchg() but Linus pointed out that: > I think we should just make file_ref_get() do a simple > > return !atomic_long_add_negative(1, &ref->refcnt)); > > and nothing else. Yes, multiple CPU's can race, and you can increment > more than once, but the gap - even on 32-bit - between DEAD and > becoming close to REF_RELEASED is so big that we simply don't care. > That's the point of having a gap. I've been testing this with will-it-scale using fstat() on a machine that Jens gave me access (thank you very much!): processor : 511 vendor_id : AuthenticAMD cpu family : 25 model : 160 model name : AMD EPYC 9754 128-Core Processor and I consistently get a 3-5% improvement on 256+ threads. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202410151043.5d224a27-oliver.sang@intel.com Closes: https://lore.kernel.org/all/202410151611.f4cd71f2-oliver.sang@intel.com Link: https://lore.kernel.org/r/20241007-brauner-file-rcuref-v2-2-387e24dc9163@kernel.org Signed-off-by: Christian Brauner --- include/linux/file_ref.h | 177 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 include/linux/file_ref.h (limited to 'include') diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h new file mode 100644 index 000000000000..9b3a8d9b17ab --- /dev/null +++ b/include/linux/file_ref.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LINUX_FILE_REF_H +#define _LINUX_FILE_REF_H + +#include +#include +#include + +/* + * file_ref is a reference count implementation specifically for use by + * files. It takes inspiration from rcuref but differs in key aspects + * such as support for SLAB_TYPESAFE_BY_RCU type caches. + * + * FILE_REF_ONEREF FILE_REF_MAXREF + * 0x0000000000000000UL 0x7FFFFFFFFFFFFFFFUL + * <-------------------valid -------------------> + * + * FILE_REF_SATURATED + * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL + * <-----------------------saturation zone----------------------> + * + * FILE_REF_RELEASED FILE_REF_DEAD + * 0xC000000000000000UL 0xE000000000000000UL + * <-------------------dead zone-------------------> + * + * FILE_REF_NOREF + * 0xFFFFFFFFFFFFFFFFUL + */ + +#ifdef CONFIG_64BIT +#define FILE_REF_ONEREF 0x0000000000000000UL +#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFUL +#define FILE_REF_SATURATED 0xA000000000000000UL +#define FILE_REF_RELEASED 0xC000000000000000UL +#define FILE_REF_DEAD 0xE000000000000000UL +#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFUL +#else +#define FILE_REF_ONEREF 0x00000000U +#define FILE_REF_MAXREF 0x7FFFFFFFU +#define FILE_REF_SATURATED 0xA0000000U +#define FILE_REF_RELEASED 0xC0000000U +#define FILE_REF_DEAD 0xE0000000U +#define FILE_REF_NOREF 0xFFFFFFFFU +#endif + +typedef struct { +#ifdef CONFIG_64BIT + atomic64_t refcnt; +#else + atomic_t refcnt; +#endif +} file_ref_t; + +/** + * file_ref_init - Initialize a file reference count + * @ref: Pointer to the reference count + * @cnt: The initial reference count typically '1' + */ +static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) +{ + atomic_long_set(&ref->refcnt, cnt - 1); +} + +bool __file_ref_put(file_ref_t *ref, unsigned long cnt); + +/** + * file_ref_get - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF. + * + * Provides full memory ordering. + * + * Return: False if the attempt to acquire a reference failed. This happens + * when the last reference has been put already. True if a reference + * was successfully acquired + */ +static __always_inline __must_check bool file_ref_get(file_ref_t *ref) +{ + /* + * Unconditionally increase the reference count with full + * ordering. The saturation and dead zones provide enough + * tolerance for this. + * + * If this indicates negative the file in question the fail can + * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU. + * Hence, unconditionally altering the file reference count to + * e.g., reset the file reference count back to the middle of + * the deadzone risk end up marking someone else's file as dead + * behind their back. + * + * It would be possible to do a careful: + * + * cnt = atomic_long_inc_return(); + * if (likely(cnt >= 0)) + * return true; + * + * and then something like: + * + * if (cnt >= FILE_REF_RELEASE) + * atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD), + * + * to set the value back to the middle of the deadzone. But it's + * practically impossible to go from FILE_REF_DEAD to + * FILE_REF_ONEREF. It would need 2305843009213693952/2^61 + * file_ref_get()s to resurrect such a dead file. + */ + return !atomic_long_add_negative(1, &ref->refcnt); +} + +/** + * file_ref_inc - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Acquire an additional reference on a file. Warns if the caller didn't + * already hold a reference. + */ +static __always_inline void file_ref_inc(file_ref_t *ref) +{ + long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt); + WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference"); +} + +/** + * file_ref_put -- Release a file reference + * @ref: Pointer to the reference count + * + * Provides release memory ordering, such that prior loads and stores + * are done before, and provides an acquire ordering on success such + * that free() must come after. + * + * Return: True if this was the last reference with no future references + * possible. This signals the caller that it can safely release + * the object which is protected by the reference counter. + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * release the protected object. + */ +static __always_inline __must_check bool file_ref_put(file_ref_t *ref) +{ + long cnt; + + /* + * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put() + * calls don't risk UAFs when a file is recyclyed, it is still + * vulnerable to UAFs caused by freeing the whole slab page once + * it becomes unused. Prevent file_ref_put() from being + * preempted protects against this. + */ + guard(preempt)(); + /* + * Unconditionally decrease the reference count. The saturation + * and dead zones provide enough tolerance for this. If this + * fails then we need to handle the last reference drop and + * cases inside the saturation and dead zones. + */ + cnt = atomic_long_dec_return(&ref->refcnt); + if (cnt >= 0) + return false; + return __file_ref_put(ref, cnt); +} + +/** + * file_ref_read - Read the number of file references + * @ref: Pointer to the reference count + * + * Return: The number of held references (0 ... N) + */ +static inline unsigned long file_ref_read(file_ref_t *ref) +{ + unsigned long c = atomic_long_read(&ref->refcnt); + + /* Return 0 if within the DEAD zone. */ + return c >= FILE_REF_RELEASED ? 0 : c + 1; +} + +#endif -- cgit v1.2.3 From 9a8dbdadae509e5717ff6e5aa572ca0974d2101d Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 19 Oct 2024 12:51:06 +0000 Subject: block/fs: Pass an iocb to generic_atomic_write_valid() Darrick and Hannes both thought it better that generic_atomic_write_valid() should be passed a struct iocb, and not just the member of that struct which is referenced; see [0] and [1]. I think that makes a more generic and clean API, so make that change. [0] https://lore.kernel.org/linux-block/680ce641-729b-4150-b875-531a98657682@suse.de/ [1] https://lore.kernel.org/linux-xfs/20240620212401.GA3058325@frogsfrogsfrogs/ Fixes: c34fc6f26ab8 ("fs: Initial atomic write support") Suggested-by: Darrick J. Wong Suggested-by: Hannes Reinecke Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241019125113.369994-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..fbfa032d1d90 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3721,6 +3721,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) return !c; } -bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos); +bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From c3be7ebbbce5201e151f17e28a6c807602f369c9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 19 Oct 2024 12:51:07 +0000 Subject: fs/block: Check for IOCB_DIRECT in generic_atomic_write_valid() Currently FMODE_CAN_ATOMIC_WRITE is set if the bdev can atomic write and the file is open for direct IO. This does not work if the file is not opened for direct IO, yet fcntl(O_DIRECT) is used on the fd later. Change to check for direct IO on a per-IO basis in generic_atomic_write_valid(). Since we want to report -EOPNOTSUPP for non-direct IO for an atomic write, change to return an error code. Relocate the block fops atomic write checks to the common write path, as to catch non-direct IO. Fixes: c34fc6f26ab8 ("fs: Initial atomic write support") Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241019125113.369994-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fbfa032d1d90..ba47fb283730 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3721,6 +3721,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) return !c; } -bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); +int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From 1eadb157947163ca72ba8963b915fdc099ce6cca Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 19 Oct 2024 12:51:08 +0000 Subject: block: Add bdev atomic write limits helpers Add helpers to get atomic write limits for a bdev, so that we don't access request_queue helpers outside the block layer. We check if the bdev can actually atomic write in these helpers, so we can avoid users missing using this check. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241019125113.369994-4-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50c3b959da28..c2cc3c146d74 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1674,6 +1674,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev) return true; } +static inline unsigned int +bdev_atomic_write_unit_min_bytes(struct block_device *bdev) +{ + if (!bdev_can_atomic_write(bdev)) + return 0; + return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev)); +} + +static inline unsigned int +bdev_atomic_write_unit_max_bytes(struct block_device *bdev) +{ + if (!bdev_can_atomic_write(bdev)) + return 0; + return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev)); +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From e896474fe4851ffc4dd860c92daa906783090346 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Oct 2024 23:08:34 -0400 Subject: getname_maybe_null() - the third variant of pathname copy-in Semantics used by statx(2) (and later *xattrat(2)): without AT_EMPTY_PATH it's standard getname() (i.e. ERR_PTR(-ENOENT) on empty string, ERR_PTR(-EFAULT) on NULL), with AT_EMPTY_PATH both empty string and NULL are accepted. Calling conventions: getname_maybe_null(user_pointer, flags) returns * pointer to struct filename when non-empty string had been successfully read * ERR_PTR(...) on error * NULL if an empty string or NULL pointer had been given with AT_EMPTY_PATH in the flags argument. It tries to avoid allocation in the last case; it's not always able to do so, in which case the temporary struct filename instance is freed and NULL returned anyway. Fast path is inlined. Signed-off-by: Al Viro --- include/linux/fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..403258ac2ea2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2766,6 +2766,16 @@ extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); +extern struct filename *__getname_maybe_null(const char __user *); +static inline struct filename *getname_maybe_null(const char __user *name, int flags) +{ + if (!(flags & AT_EMPTY_PATH)) + return getname(name); + + if (!name) + return NULL; + return __getname_maybe_null(name); +} extern void putname(struct filename *name); extern int finish_open(struct file *file, struct dentry *dentry, -- cgit v1.2.3 From af264e5989055ac33f413c4c80874345cda0cc97 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Oct 2024 19:05:57 +0200 Subject: mtd: spinand: Constify struct nand_ecc_engine_ops 'struct nand_ecc_engine_ops' are not modified in these drivers. Constifying this structure moves some data to a read-only section, so increases overall security, especially when the structure holds some function pointers. Update the prototype of mxic_ecc_get_pipelined_ops() accordingly. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 16709 1374 16 18099 46b3 drivers/mtd/nand/ecc-mxic.o After: ===== text data bss dec hex filename 16789 1294 16 18099 46b3 drivers/mtd/nand/ecc-mxic.o Signed-off-by: Christophe JAILLET Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/72597e9de2320a4109be2112e696399592edacd4.1729271136.git.christophe.jaillet@wanadoo.fr --- include/linux/mtd/nand-ecc-mxic.h | 4 ++-- include/linux/mtd/nand.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand-ecc-mxic.h b/include/linux/mtd/nand-ecc-mxic.h index b125926e458c..0da4b2999576 100644 --- a/include/linux/mtd/nand-ecc-mxic.h +++ b/include/linux/mtd/nand-ecc-mxic.h @@ -16,7 +16,7 @@ struct mxic_ecc_engine; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) && IS_REACHABLE(CONFIG_MTD_NAND_CORE) -struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); +const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); struct nand_ecc_engine *mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev); void mxic_ecc_put_pipelined_engine(struct nand_ecc_engine *eng); int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, @@ -24,7 +24,7 @@ int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, #else /* !CONFIG_MTD_NAND_ECC_MXIC */ -static inline struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) +static inline const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) { return NULL; } diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1e4208040956..0e2f228e8b4a 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -293,7 +293,7 @@ enum nand_ecc_engine_integration { struct nand_ecc_engine { struct device *dev; struct list_head node; - struct nand_ecc_engine_ops *ops; + const struct nand_ecc_engine_ops *ops; enum nand_ecc_engine_integration integration; void *priv; }; -- cgit v1.2.3 From 192514ae05687a60ac230569b2a215fb2d3b8d90 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 10 Jun 2024 10:57:32 +0200 Subject: soc: mediatek: Add MediaTek DVFS Resource Collector (DVFSRC) driver The Dynamic Voltage and Frequency Scaling Resource Collector (DVFSRC) is a Hardware module used to collect all the requests from both software and the various remote processors embedded into the SoC and decide about a minimum operating voltage and a minimum DRAM frequency to fulfill those requests in an effort to provide the best achievable performance per watt. This hardware IP is capable of transparently performing direct register R/W on all of the DVFSRC-controlled regulators and SoC bandwidth knobs. This driver includes support for MT8183, MT8192 and MT8195. Co-Developed-by: Dawei Chien [Angelo: Partial refactoring and cleanups] Reviewed-by: Georgi Djakov Link: https://lore.kernel.org/r/20240610085735.147134-5-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- include/linux/soc/mediatek/dvfsrc.h | 36 ++++++++++++++++++++++++++++++++ include/linux/soc/mediatek/mtk_sip_svc.h | 3 +++ 2 files changed, 39 insertions(+) create mode 100644 include/linux/soc/mediatek/dvfsrc.h (limited to 'include') diff --git a/include/linux/soc/mediatek/dvfsrc.h b/include/linux/soc/mediatek/dvfsrc.h new file mode 100644 index 000000000000..1498b3ed396b --- /dev/null +++ b/include/linux/soc/mediatek/dvfsrc.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2021 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#ifndef __MEDIATEK_DVFSRC_H +#define __MEDIATEK_DVFSRC_H + +enum mtk_dvfsrc_cmd { + MTK_DVFSRC_CMD_BW, + MTK_DVFSRC_CMD_HRT_BW, + MTK_DVFSRC_CMD_PEAK_BW, + MTK_DVFSRC_CMD_OPP, + MTK_DVFSRC_CMD_VCORE_LEVEL, + MTK_DVFSRC_CMD_VSCP_LEVEL, + MTK_DVFSRC_CMD_MAX, +}; + +#if IS_ENABLED(CONFIG_MTK_DVFSRC) + +int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data); +int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data); + +#else + +static inline int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data) +{ return -ENODEV; } + +static inline int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data) +{ return -ENODEV; } + +#endif /* CONFIG_MTK_DVFSRC */ + +#endif diff --git a/include/linux/soc/mediatek/mtk_sip_svc.h b/include/linux/soc/mediatek/mtk_sip_svc.h index 0761128b4354..abe24a73ee19 100644 --- a/include/linux/soc/mediatek/mtk_sip_svc.h +++ b/include/linux/soc/mediatek/mtk_sip_svc.h @@ -22,6 +22,9 @@ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, MTK_SIP_SMC_CONVENTION, \ ARM_SMCCC_OWNER_SIP, fn_id) +/* DVFSRC SMC calls */ +#define MTK_SIP_DVFSRC_VCOREFS_CONTROL MTK_SIP_SMC_CMD(0x506) + /* IOMMU related SMC call */ #define MTK_SIP_KERNEL_IOMMU_CONTROL MTK_SIP_SMC_CMD(0x514) -- cgit v1.2.3 From fb6f20ecb121cef4d7946f834a6ee867c4e21b4a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 17 Oct 2024 12:28:23 +0200 Subject: reiserfs: The last commit Deprecation period of reiserfs ends with the end of this year so it is time to remove it from the kernel. Acked-by: Darrick J. Wong Acked-by: Christian Brauner Signed-off-by: Jan Kara --- include/uapi/linux/reiserfs_fs.h | 27 --------------------------- include/uapi/linux/reiserfs_xattr.h | 25 ------------------------- 2 files changed, 52 deletions(-) delete mode 100644 include/uapi/linux/reiserfs_fs.h delete mode 100644 include/uapi/linux/reiserfs_xattr.h (limited to 'include') diff --git a/include/uapi/linux/reiserfs_fs.h b/include/uapi/linux/reiserfs_fs.h deleted file mode 100644 index 5bb921409f2b..000000000000 --- a/include/uapi/linux/reiserfs_fs.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details - */ -#ifndef _LINUX_REISER_FS_H -#define _LINUX_REISER_FS_H - -#include -#include - -/* - * include/linux/reiser_fs.h - * - * Reiser File System constants and structures - * - */ - -/* ioctl's command */ -#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) -/* define following flags to be the same as in ext2, so that chattr(1), - lsattr(1) will work with us. */ -#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION -#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION - -#endif /* _LINUX_REISER_FS_H */ diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h deleted file mode 100644 index 503ad018ce5b..000000000000 --- a/include/uapi/linux/reiserfs_xattr.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - File: linux/reiserfs_xattr.h -*/ - -#ifndef _LINUX_REISERFS_XATTR_H -#define _LINUX_REISERFS_XATTR_H - -#include - -/* Magic value in header */ -#define REISERFS_XATTR_MAGIC 0x52465841 /* "RFXA" */ - -struct reiserfs_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_hash; /* hash of the value */ -}; - -struct reiserfs_security_handle { - const char *name; - void *value; - __kernel_size_t length; -}; - -#endif /* _LINUX_REISERFS_XATTR_H */ -- cgit v1.2.3 From 424a55a4a9087887fcfcee561648df497701a4a2 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 10 Oct 2024 07:40:34 +1100 Subject: uaccess: add copy_struct_to_user helper This is based on copy_struct_from_user(), but there is one additional case to consider when creating a syscall that returns an extensible-struct to userspace -- how should data in the struct that cannot fit into the userspace struct be handled (ksize > usize)? There are three possibilies: 1. The interface is like sched_getattr(2), where new information will be silently not provided to userspace. This is probably what most interfaces will want to do, as it provides the most possible backwards-compatibility. 2. The interface is like lsm_list_modules(2), where you want to return an error like -EMSGSIZE if not providing information could result in the userspace program making a serious mistake (such as one that could lead to a security problem) or if you want to provide some flag to userspace so they know that they are missing some information. 3. The interface is like statx(2), where there some kind of a request mask that indicates what data userspace would like. One could imagine that statx2(2) (using extensible structs) would want to return -EMSGSIZE if the user explicitly requested a field that their structure is too small to fit, but not return an error if the field was not explicitly requested. This is kind of a mix between (1) and (2) based on the requested mask. The copy_struct_to_user() helper includes a an extra argument that is used to return a boolean flag indicating whether there was a non-zero byte in the trailing bytes that were not copied to userspace. This can be used in the following ways to handle all three cases, respectively: 1. Just pass NULL, as you don't care about this case. 2. Return an error (say -EMSGSIZE) if the argument was set to true by copy_struct_to_user(). 3. If the argument was set to true by copy_struct_to_user(), check if there is a flag that implies a field larger than usize. This is the only case where callers of copy_struct_to_user() should check usize themselves. This will probably require scanning an array that specifies what flags were added for each version of the flags struct and returning an error if the request mask matches any of the flags that were added in versions of the struct that are larger than usize. At the moment we don't have any users of (3), so this patch doesn't include any helpers to make the necessary scanning easier, but it should be fairly easy to add some if necessary. Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20241010-extensible-structs-check_fields-v3-1-d2833dfe6edd@cyphar.com Signed-off-by: Christian Brauner --- include/linux/uaccess.h | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 39c7cf82b0c2..1e7c68b4c262 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -394,6 +394,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } +/** + * copy_struct_to_user: copy a struct to userspace + * @dst: Destination address, in userspace. This buffer must be @ksize + * bytes long. + * @usize: (Alleged) size of @dst struct. + * @src: Source address, in kernel space. + * @ksize: Size of @src struct. + * @ignored_trailing: Set to %true if there was a non-zero byte in @src that + * userspace cannot see because they are using an smaller struct. + * + * Copies a struct from kernel space to userspace, in a way that guarantees + * backwards-compatibility for struct syscall arguments (as long as future + * struct extensions are made such that all new fields are *appended* to the + * old struct, and zeroed-out new fields have the same meaning as the old + * struct). + * + * Some syscalls may wish to make sure that userspace knows about everything in + * the struct, and if there is a non-zero value that userspce doesn't know + * about, they want to return an error (such as -EMSGSIZE) or have some other + * fallback (such as adding a "you're missing some information" flag). If + * @ignored_trailing is non-%NULL, it will be set to %true if there was a + * non-zero byte that could not be copied to userspace (ie. was past @usize). + * + * While unconditionally returning an error in this case is the simplest + * solution, for maximum backward compatibility you should try to only return + * -EMSGSIZE if the user explicitly requested the data that couldn't be copied. + * Note that structure sizes can change due to header changes and simple + * recompilations without code changes(!), so if you care about + * @ignored_trailing you probably want to make sure that any new field data is + * associated with a flag. Otherwise you might assume that a program knows + * about data it does not. + * + * @ksize is just sizeof(*src), and @usize should've been passed by userspace. + * The recommended usage is something like the following: + * + * SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize) + * { + * int err; + * bool ignored_trailing; + * struct foo karg = {}; + * + * if (usize > PAGE_SIZE) + * return -E2BIG; + * if (usize < FOO_SIZE_VER0) + * return -EINVAL; + * + * // ... modify karg somehow ... + * + * err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg), + * &ignored_trailing); + * if (err) + * return err; + * if (ignored_trailing) + * return -EMSGSIZE: + * + * // ... + * } + * + * There are three cases to consider: + * * If @usize == @ksize, then it's copied verbatim. + * * If @usize < @ksize, then the kernel is trying to pass userspace a newer + * struct than it supports. Thus we only copy the interoperable portions + * (@usize) and ignore the rest (but @ignored_trailing is set to %true if + * any of the trailing (@ksize - @usize) bytes are non-zero). + * * If @usize > @ksize, then the kernel is trying to pass userspace an older + * struct than userspace supports. In order to make sure the + * unknown-to-the-kernel fields don't contain garbage values, we zero the + * trailing (@usize - @ksize) bytes. + * + * Returns (in all cases, some data may have been copied): + * * -EFAULT: access to userspace failed. + */ +static __always_inline __must_check int +copy_struct_to_user(void __user *dst, size_t usize, const void *src, + size_t ksize, bool *ignored_trailing) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Double check if ksize is larger than a known object size. */ + if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1))) + return -E2BIG; + + /* Deal with trailing bytes. */ + if (usize > ksize) { + if (clear_user(dst + size, rest)) + return -EFAULT; + } + if (ignored_trailing) + *ignored_trailing = ksize < usize && + memchr_inv(src + size, 0, rest) != NULL; + /* Copy the interoperable parts of the struct. */ + if (copy_to_user(dst, src, size)) + return -EFAULT; + return 0; +} + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); -- cgit v1.2.3 From 48931f65e9f785b65244550cc8f0c8bf9eab7acd Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 15 Oct 2024 17:42:42 +0000 Subject: RDMA/efa: Add option to set QP service level on create Using modify QP with AH attributes and IB_QP_AV flag set doesn't make much sense for connectionless QP types like SRD. Add SL parameter to EFA create QP user ABI and pass it to the device. Link: https://patch.msgid.link/r/20241015174242.3490-3-mrgolin@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/efa-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index d689b8b34189..11b94b0b035b 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -95,7 +95,8 @@ struct efa_ibv_create_qp { __u32 sq_ring_size; /* bytes */ __u32 driver_qp_type; __u16 flags; - __u8 reserved_90[6]; + __u8 sl; + __u8 reserved_98[5]; }; struct efa_ibv_create_qp_resp { -- cgit v1.2.3 From 3e482e2840546a3ff725d5adf8d0779ac1c3cf4f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 18 Oct 2024 14:37:25 -0700 Subject: dmaengine: idxd: Move DSA/IAA device IDs to IDXD driver Since the DSA/IAA device IDs are only used by the IDXD driver, there is no need to define them as public IDs. Move their definitions to the IDXD driver to limit their scope. This change helps reduce unnecessary exposure of the device IDs in the global space, making the codebase cleaner and better encapsulated. There is no functional change. Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20241018213725.4167413-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/linux/pci_ids.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4cf6aaed5f35..e4bddb927795 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2709,9 +2709,6 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a -#define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb -#define PCI_DEVICE_ID_INTEL_DSA_DMR 0x1212 -#define PCI_DEVICE_ID_INTEL_IAA_DMR 0x1216 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e -- cgit v1.2.3 From 6fa115569d980fca1969c075d8d958d205a405ca Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Tue, 10 Sep 2024 10:10:12 +0000 Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in QCS8300 SoC Document the RPMh Network-On-Chip Interconnect of the QCS8300 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Raviteja Laggyshetty Link: https://lore.kernel.org/r/20240910101013.3020-2-quic_rlaggysh@quicinc.com Signed-off-by: Georgi Djakov --- .../dt-bindings/interconnect/qcom,qcs8300-rpmh.h | 189 +++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h b/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h new file mode 100644 index 000000000000..c5eeafa1b1dd --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS8300_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS8300_H + +#define MASTER_QUP_3 0 +#define MASTER_EMAC 1 +#define MASTER_SDC 2 +#define MASTER_UFS_MEM 3 +#define MASTER_USB2 4 +#define MASTER_USB3_0 5 +#define SLAVE_A1NOC_SNOC 6 + +#define MASTER_QDSS_BAM 0 +#define MASTER_QUP_0 1 +#define MASTER_QUP_1 2 +#define MASTER_CNOC_A2NOC 3 +#define MASTER_CRYPTO_CORE0 4 +#define MASTER_CRYPTO_CORE1 5 +#define MASTER_IPA 6 +#define MASTER_QDSS_ETR_0 7 +#define MASTER_QDSS_ETR_1 8 +#define SLAVE_A2NOC_SNOC 9 + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define MASTER_QUP_CORE_3 2 +#define SLAVE_QUP_CORE_0 3 +#define SLAVE_QUP_CORE_1 4 +#define SLAVE_QUP_CORE_3 5 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define SLAVE_AHB2PHY_2 2 +#define SLAVE_AHB2PHY_3 3 +#define SLAVE_ANOC_THROTTLE_CFG 4 +#define SLAVE_AOSS 5 +#define SLAVE_APPSS 6 +#define SLAVE_BOOT_ROM 7 +#define SLAVE_CAMERA_CFG 8 +#define SLAVE_CAMERA_NRT_THROTTLE_CFG 9 +#define SLAVE_CAMERA_RT_THROTTLE_CFG 10 +#define SLAVE_CLK_CTL 11 +#define SLAVE_CDSP_CFG 12 +#define SLAVE_RBCPR_CX_CFG 13 +#define SLAVE_RBCPR_MMCX_CFG 14 +#define SLAVE_RBCPR_MX_CFG 15 +#define SLAVE_CPR_NSPCX 16 +#define SLAVE_CPR_NSPHMX 17 +#define SLAVE_CRYPTO_0_CFG 18 +#define SLAVE_CX_RDPM 19 +#define SLAVE_DISPLAY_CFG 20 +#define SLAVE_DISPLAY_RT_THROTTLE_CFG 21 +#define SLAVE_EMAC_CFG 22 +#define SLAVE_GP_DSP0_CFG 23 +#define SLAVE_GPDSP0_THROTTLE_CFG 24 +#define SLAVE_GPU_TCU_THROTTLE_CFG 25 +#define SLAVE_GFX3D_CFG 26 +#define SLAVE_HWKM 27 +#define SLAVE_IMEM_CFG 28 +#define SLAVE_IPA_CFG 29 +#define SLAVE_IPC_ROUTER_CFG 30 +#define SLAVE_LPASS 31 +#define SLAVE_LPASS_THROTTLE_CFG 32 +#define SLAVE_MX_RDPM 33 +#define SLAVE_MXC_RDPM 34 +#define SLAVE_PCIE_0_CFG 35 +#define SLAVE_PCIE_1_CFG 36 +#define SLAVE_PCIE_TCU_THROTTLE_CFG 37 +#define SLAVE_PCIE_THROTTLE_CFG 38 +#define SLAVE_PDM 39 +#define SLAVE_PIMEM_CFG 40 +#define SLAVE_PKA_WRAPPER_CFG 41 +#define SLAVE_QDSS_CFG 42 +#define SLAVE_QM_CFG 43 +#define SLAVE_QM_MPU_CFG 44 +#define SLAVE_QUP_0 45 +#define SLAVE_QUP_1 46 +#define SLAVE_QUP_3 47 +#define SLAVE_SAIL_THROTTLE_CFG 48 +#define SLAVE_SDC1 49 +#define SLAVE_SECURITY 50 +#define SLAVE_SNOC_THROTTLE_CFG 51 +#define SLAVE_TCSR 52 +#define SLAVE_TLMM 53 +#define SLAVE_TSC_CFG 54 +#define SLAVE_UFS_MEM_CFG 55 +#define SLAVE_USB2 56 +#define SLAVE_USB3_0 57 +#define SLAVE_VENUS_CFG 58 +#define SLAVE_VENUS_CVP_THROTTLE_CFG 59 +#define SLAVE_VENUS_V_CPU_THROTTLE_CFG 60 +#define SLAVE_VENUS_VCODEC_THROTTLE_CFG 61 +#define SLAVE_DDRSS_CFG 62 +#define SLAVE_GPDSP_NOC_CFG 63 +#define SLAVE_CNOC_MNOC_HF_CFG 64 +#define SLAVE_CNOC_MNOC_SF_CFG 65 +#define SLAVE_PCIE_ANOC_CFG 66 +#define SLAVE_SNOC_CFG 67 +#define SLAVE_BOOT_IMEM 68 +#define SLAVE_IMEM 69 +#define SLAVE_PIMEM 70 +#define SLAVE_PCIE_0 71 +#define SLAVE_PCIE_1 72 +#define SLAVE_QDSS_STM 73 +#define SLAVE_TCU 74 + +#define MASTER_CNOC_DC_NOC 0 +#define SLAVE_LLCC_CFG 1 +#define SLAVE_GEM_NOC_CFG 2 + +#define MASTER_GPU_TCU 0 +#define MASTER_PCIE_TCU 1 +#define MASTER_SYS_TCU 2 +#define MASTER_APPSS_PROC 3 +#define MASTER_COMPUTE_NOC 4 +#define MASTER_GEM_NOC_CFG 5 +#define MASTER_GPDSP_SAIL 6 +#define MASTER_GFX3D 7 +#define MASTER_MNOC_HF_MEM_NOC 8 +#define MASTER_MNOC_SF_MEM_NOC 9 +#define MASTER_ANOC_PCIE_GEM_NOC 10 +#define MASTER_SNOC_GC_MEM_NOC 11 +#define MASTER_SNOC_SF_MEM_NOC 12 +#define SLAVE_GEM_NOC_CNOC 13 +#define SLAVE_LLCC 14 +#define SLAVE_GEM_NOC_PCIE_CNOC 15 +#define SLAVE_SERVICE_GEM_NOC_1 16 +#define SLAVE_SERVICE_GEM_NOC_2 17 +#define SLAVE_SERVICE_GEM_NOC 18 +#define SLAVE_SERVICE_GEM_NOC2 19 + +#define MASTER_SAILSS_MD0 0 +#define MASTER_DSP0 1 +#define SLAVE_GP_DSP_SAIL_NOC 2 + +#define MASTER_CNOC_LPASS_AG_NOC 0 +#define MASTER_LPASS_PROC 1 +#define SLAVE_LPASS_CORE_CFG 2 +#define SLAVE_LPASS_LPI_CFG 3 +#define SLAVE_LPASS_MPU_CFG 4 +#define SLAVE_LPASS_TOP_CFG 5 +#define SLAVE_LPASS_SNOC 6 +#define SLAVE_SERVICES_LPASS_AML_NOC 7 +#define SLAVE_SERVICE_LPASS_AG_NOC 8 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_ICP 1 +#define MASTER_CAMNOC_SF 2 +#define MASTER_MDP0 3 +#define MASTER_MDP1 4 +#define MASTER_CNOC_MNOC_HF_CFG 5 +#define MASTER_CNOC_MNOC_SF_CFG 6 +#define MASTER_VIDEO_P0 7 +#define MASTER_VIDEO_PROC 8 +#define MASTER_VIDEO_V_PROC 9 +#define SLAVE_MNOC_HF_MEM_NOC 10 +#define SLAVE_MNOC_SF_MEM_NOC 11 +#define SLAVE_SERVICE_MNOC_HF 12 +#define SLAVE_SERVICE_MNOC_SF 13 + +#define MASTER_CDSP_NOC_CFG 0 +#define MASTER_CDSP_PROC 1 +#define SLAVE_HCP_A 2 +#define SLAVE_CDSP_MEM_NOC 3 +#define SLAVE_SERVICE_NSP_NOC 4 + +#define MASTER_PCIE_0 0 +#define MASTER_PCIE_1 1 +#define SLAVE_ANOC_PCIE_GEM_NOC 2 + +#define MASTER_GIC_AHB 0 +#define MASTER_A1NOC_SNOC 1 +#define MASTER_A2NOC_SNOC 2 +#define MASTER_LPASS_ANOC 3 +#define MASTER_SNOC_CFG 4 +#define MASTER_PIMEM 5 +#define MASTER_GIC 6 +#define SLAVE_SNOC_GEM_NOC_GC 7 +#define SLAVE_SNOC_GEM_NOC_SF 8 +#define SLAVE_SERVICE_SNOC 9 + +#endif -- cgit v1.2.3 From 6c5e948f1fffdb1ae2c6d2f3d5336dbf07189334 Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Tue, 24 Sep 2024 14:39:57 +0000 Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in QCS615 SoC Document the RPMh Network-On-Chip Interconnect of the QCS615 platform. Signed-off-by: Raviteja Laggyshetty Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240924143958.25-2-quic_rlaggysh@quicinc.com Signed-off-by: Georgi Djakov --- .../dt-bindings/interconnect/qcom,qcs615-rpmh.h | 136 +++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,qcs615-rpmh.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h b/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h new file mode 100644 index 000000000000..84ae0d39e73c --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS615_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS615_H + +#define MASTER_A1NOC_CFG 1 +#define MASTER_QDSS_BAM 2 +#define MASTER_QSPI 3 +#define MASTER_QUP_0 4 +#define MASTER_BLSP_1 5 +#define MASTER_CNOC_A2NOC 6 +#define MASTER_CRYPTO 7 +#define MASTER_IPA 8 +#define MASTER_EMAC_EVB 9 +#define MASTER_PCIE 10 +#define MASTER_QDSS_ETR 11 +#define MASTER_SDCC_1 12 +#define MASTER_SDCC_2 13 +#define MASTER_UFS_MEM 14 +#define MASTER_USB2 15 +#define MASTER_USB3_0 16 +#define SLAVE_A1NOC_SNOC 17 +#define SLAVE_LPASS_SNOC 18 +#define SLAVE_ANOC_PCIE_SNOC 19 +#define SLAVE_SERVICE_A2NOC 20 + +#define MASTER_CAMNOC_HF0_UNCOMP 1 +#define MASTER_CAMNOC_HF1_UNCOMP 2 +#define MASTER_CAMNOC_SF_UNCOMP 3 +#define SLAVE_CAMNOC_UNCOMP 4 + +#define MASTER_SPDM 1 +#define MASTER_SNOC_CNOC 2 +#define MASTER_QDSS_DAP 3 +#define SLAVE_A1NOC_CFG 4 +#define SLAVE_AHB2PHY_EAST 5 +#define SLAVE_AHB2PHY_WEST 6 +#define SLAVE_AOP 7 +#define SLAVE_AOSS 8 +#define SLAVE_CAMERA_CFG 9 +#define SLAVE_CLK_CTL 10 +#define SLAVE_RBCPR_CX_CFG 11 +#define SLAVE_RBCPR_MX_CFG 12 +#define SLAVE_CRYPTO_0_CFG 13 +#define SLAVE_CNOC_DDRSS 14 +#define SLAVE_DISPLAY_CFG 15 +#define SLAVE_EMAC_AVB_CFG 16 +#define SLAVE_GLM 17 +#define SLAVE_GFX3D_CFG 18 +#define SLAVE_IMEM_CFG 19 +#define SLAVE_IPA_CFG 20 +#define SLAVE_CNOC_MNOC_CFG 21 +#define SLAVE_PCIE_CFG 22 +#define SLAVE_PIMEM_CFG 23 +#define SLAVE_PRNG 24 +#define SLAVE_QDSS_CFG 25 +#define SLAVE_QSPI 26 +#define SLAVE_QUP_0 27 +#define SLAVE_QUP_1 28 +#define SLAVE_SDCC_1 29 +#define SLAVE_SDCC_2 30 +#define SLAVE_SNOC_CFG 31 +#define SLAVE_SPDM_WRAPPER 32 +#define SLAVE_TCSR 33 +#define SLAVE_TLMM_EAST 34 +#define SLAVE_TLMM_SOUTH 35 +#define SLAVE_TLMM_WEST 36 +#define SLAVE_UFS_MEM_CFG 37 +#define SLAVE_USB2 38 +#define SLAVE_USB3 39 +#define SLAVE_VENUS_CFG 40 +#define SLAVE_VSENSE_CTRL_CFG 41 +#define SLAVE_CNOC_A2NOC 42 +#define SLAVE_SERVICE_CNOC 43 + +#define MASTER_CNOC_DC_NOC 1 +#define SLAVE_DC_NOC_GEMNOC 2 +#define SLAVE_LLCC_CFG 3 + +#define MASTER_APPSS_PROC 1 +#define MASTER_GPU_TCU 2 +#define MASTER_SYS_TCU 3 +#define MASTER_GEM_NOC_CFG 4 +#define MASTER_GFX3D 5 +#define MASTER_MNOC_HF_MEM_NOC 6 +#define MASTER_MNOC_SF_MEM_NOC 7 +#define MASTER_SNOC_GC_MEM_NOC 8 +#define MASTER_SNOC_SF_MEM_NOC 9 +#define SLAVE_MSS_PROC_MS_MPU_CFG 10 +#define SLAVE_GEM_NOC_SNOC 11 +#define SLAVE_LLCC 12 +#define SLAVE_MEM_NOC_PCIE_SNOC 13 +#define SLAVE_SERVICE_GEM_NOC 14 + +#define MASTER_IPA_CORE 1 +#define SLAVE_IPA_CORE 2 + +#define MASTER_LLCC 1 +#define SLAVE_EBI1 2 + +#define MASTER_CNOC_MNOC_CFG 1 +#define MASTER_CAMNOC_HF0 2 +#define MASTER_CAMNOC_HF1 3 +#define MASTER_CAMNOC_SF 4 +#define MASTER_MDP0 5 +#define MASTER_ROTATOR 6 +#define MASTER_VIDEO_P0 7 +#define MASTER_VIDEO_PROC 8 +#define SLAVE_MNOC_SF_MEM_NOC 9 +#define SLAVE_MNOC_HF_MEM_NOC 10 +#define SLAVE_SERVICE_MNOC 11 + +#define MASTER_SNOC_CFG 1 +#define MASTER_A1NOC_SNOC 2 +#define MASTER_GEM_NOC_SNOC 3 +#define MASTER_GEM_NOC_PCIE_SNOC 4 +#define MASTER_LPASS_ANOC 5 +#define MASTER_ANOC_PCIE_SNOC 6 +#define MASTER_PIMEM 7 +#define MASTER_GIC 8 +#define SLAVE_APPSS 9 +#define SLAVE_SNOC_CNOC 10 +#define SLAVE_SNOC_GEM_NOC_SF 11 +#define SLAVE_SNOC_MEM_NOC_GC 12 +#define SLAVE_IMEM 13 +#define SLAVE_PIMEM 14 +#define SLAVE_SERVICE_SNOC 15 +#define SLAVE_PCIE_0 16 +#define SLAVE_QDSS_STM 17 +#define SLAVE_TCU 18 + +#endif + -- cgit v1.2.3 From 3ccddc3991beef2705e8097b01ae07054bf11022 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 21 Oct 2024 16:37:46 +0300 Subject: drm/fbdev: fix drm_fb_helper_deferred_io() build failure The drm_fb_helper_deferred_io() uses struct fb_deferred_io_pageref, which isn't available without CONFIG_FB_DEFERRED_IO. Put the function under corresponding #ifdef to fix build failure if deferred I/O isn't enabled. Fixes: 8058944f5226 ("drm/fbdev: Select fbdev I/O helpers from modules that require them") Signed-off-by: Dmitry Baryshkov Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20241021-fix-drm-deferred-v2-1-db1de4c6b042@linaro.org --- include/drm/drm_fb_helper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 699f2790b9ac..8426b9921a03 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -256,7 +256,9 @@ void drm_fb_helper_fill_info(struct fb_info *info, void drm_fb_helper_damage_range(struct fb_info *info, off_t off, size_t len); void drm_fb_helper_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, u32 height); +#ifdef CONFIG_FB_DEFERRED_IO void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist); +#endif void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend); void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper, @@ -361,10 +363,12 @@ static inline int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd, return 0; } +#ifdef CONFIG_FB_DEFERRED_IO static inline void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagelist) { } +#endif static inline void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend) -- cgit v1.2.3 From 43c7ce69d28e185f62fe2b8be2c681c5cac0bc6b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:50 -0700 Subject: rtnetlink: Protect struct rtnl_link_ops with SRCU. Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK even when its module is being unloaded. Let's use SRCU to protect ops. rtnl_link_ops_get() now iterates link_ops under RCU and returns SRCU-protected ops pointer. The caller must call rtnl_link_ops_put() to release the pointer after the use. Also, __rtnl_link_unregister() unlinks the ops first and calls synchronize_srcu() to wait for inflight RTM_NEWLINK requests to complete. Note that link_ops needs to be protected by its dedicated lock when RTNL is removed. Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bb49c5708ce7..1a6aa5ca74f3 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -3,6 +3,7 @@ #define __NET_RTNETLINK_H #include +#include #include typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, @@ -69,7 +70,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) /** * struct rtnl_link_ops - rtnetlink link operations * - * @list: Used internally + * @list: Used internally, protected by RTNL and SRCU + * @srcu: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number @@ -100,6 +102,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) */ struct rtnl_link_ops { struct list_head list; + struct srcu_struct srcu; const char *kind; -- cgit v1.2.3 From 26eebdc4b005ccd4cf63f4fef4c9c0adf9bfa380 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:56 -0700 Subject: rtnetlink: Return int from rtnl_af_register(). The next patch will add init_srcu_struct() in rtnl_af_register(), then we need to handle its error. Let's add the error handling in advance to make the following patch cleaner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Matt Johnston Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 1a6aa5ca74f3..969138ae2f4b 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -204,7 +204,7 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void rtnl_af_register(struct rtnl_af_ops *ops); +int rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); -- cgit v1.2.3 From 6ab0f866948323724e95cf14d9e47fd77703c192 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:57 -0700 Subject: rtnetlink: Protect struct rtnl_af_ops with SRCU. Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to guarantee that rtnl_af_ops is alive during inflight RTM_SETLINK even when its module is being unloaded. Let's use SRCU to protect ops. rtnl_af_lookup() now iterates rtnl_af_ops under RCU and returns SRCU-protected ops pointer. The caller must call rtnl_af_put() to release the pointer after the use. Also, rtnl_af_unregister() unlinks the ops first and calls synchronize_srcu() to wait for inflight RTM_SETLINK requests to complete. Note that rtnl_af_ops needs to be protected by its dedicated lock when RTNL is removed. Note also that BUG_ON() in do_setlink() is changed to the normal error handling as a different af_ops might be found after validate_linkmsg(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 969138ae2f4b..e0d9a8eae6b6 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -172,7 +172,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops); /** * struct rtnl_af_ops - rtnetlink address family operations * - * @list: Used internally + * @list: Used internally, protected by RTNL and SRCU + * @srcu: Used internally * @family: Address family * @fill_link_af: Function to fill IFLA_AF_SPEC with address family * specific netlink attributes. @@ -185,6 +186,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops); */ struct rtnl_af_ops { struct list_head list; + struct srcu_struct srcu; + int family; int (*fill_link_af)(struct sk_buff *skb, -- cgit v1.2.3 From 6474353a5e3d0b2cf610153cea0c61f576a36d0a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 25 Sep 2024 11:05:16 +0200 Subject: epoll: annotate racy check Epoll relies on a racy fastpath check during __fput() in eventpoll_release() to avoid the hit of pointlessly acquiring a semaphore. Annotate that race by using WRITE_ONCE() and READ_ONCE(). Link: https://lore.kernel.org/r/66edfb3c.050a0220.3195df.001a.GAE@google.com Link: https://lore.kernel.org/r/20240925-fungieren-anbauen-79b334b00542@brauner Reviewed-by: Jan Kara Reported-by: syzbot+3b6b32dc50537a49bb4a@syzkaller.appspotmail.com Signed-off-by: Christian Brauner --- include/linux/eventpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 3337745d81bd..0c0d00fcd131 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(!file->f_ep)) + if (likely(!READ_ONCE(file->f_ep))) return; /* -- cgit v1.2.3 From 900bbaae67e980945dec74d36f8afe0de7556d5a Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Fri, 26 Apr 2024 16:05:48 +0800 Subject: epoll: Add synchronous wakeup support for ep_poll_callback Now, the epoll only use wake_up() interface to wake up task. However, sometimes, there are epoll users which want to use the synchronous wakeup flag to hint the scheduler, such as Android binder driver. So add a wake_up_sync() define, and use the wake_up_sync() when the sync is true in ep_poll_callback(). Co-developed-by: Jing Xia Signed-off-by: Jing Xia Signed-off-by: Xuewen Yan Link: https://lore.kernel.org/r/20240426080548.8203-1-xuewen.yan@unisoc.com Tested-by: Brian Geffon Reviewed-by: Brian Geffon Reported-by: Benoit Lize Signed-off-by: Christian Brauner --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 8aa3372f21a0..2b322a9b88a2 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) #define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) #define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) +#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL) #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) -- cgit v1.2.3 From 99bdadbde9c418f29b78b7241732268dbc0a05cc Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 15 Oct 2024 22:21:54 +0200 Subject: acl: Realign struct posix_acl to save 8 bytes Reduce posix_acl's struct size by 8 bytes by realigning its members. Cc: Christian Brauner Reviewed-by: Jan Kara Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20241015202158.2376-1-thorsten.blum@linux.dev Signed-off-by: Christian Brauner --- include/linux/posix_acl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 0e65b3d634d9..2d6a4badd306 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -28,8 +28,8 @@ struct posix_acl_entry { struct posix_acl { refcount_t a_refcount; - struct rcu_head a_rcu; unsigned int a_count; + struct rcu_head a_rcu; struct posix_acl_entry a_entries[]; }; -- cgit v1.2.3 From 8c6e03ffedc5463d1aa1ba89f6ceb082518a3520 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 18 Oct 2024 14:14:21 +0200 Subject: acl: Annotate struct posix_acl with __counted_by() Add the __counted_by compiler attribute to the flexible array member a_entries to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Use struct_size() to calculate the number of bytes to allocate for new and cloned acls and remove the local size variables. Change the posix_acl_alloc() function parameter count from int to unsigned int to match posix_acl's a_count data type. Add identifier names to the function definition to silence two checkpatch warnings. Reviewed-by: Jan Kara Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20241018121426.155247-2-thorsten.blum@linux.dev Cc: Nathan Chancellor Signed-off-by: Christian Brauner --- include/linux/posix_acl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 2d6a4badd306..e2d47eb1a7f3 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -30,7 +30,7 @@ struct posix_acl { refcount_t a_refcount; unsigned int a_count; struct rcu_head a_rcu; - struct posix_acl_entry a_entries[]; + struct posix_acl_entry a_entries[] __counted_by(a_count); }; #define FOREACH_ACL_ENTRY(pa, acl, pe) \ @@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ extern void posix_acl_init(struct posix_acl *, int); -extern struct posix_acl *posix_acl_alloc(int, gfp_t); +extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); -- cgit v1.2.3 From bc2bb732162fb183e5c8df9d42604ddb7ec36e8b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Oct 2024 11:33:24 +0300 Subject: dt-bindings: interconnect: qcom: document SAR2130P NoC Add bindings for the Network of Connects (NoC) present on the Qualcomm SAR2130P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241018-sar2130p-icc-v2-1-c58c73dcd19d@linaro.org Signed-off-by: Georgi Djakov --- .../dt-bindings/interconnect/qcom,sar2130p-rpmh.h | 137 +++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h b/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h new file mode 100644 index 000000000000..aec7cbb7cd70 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024, Linaro Ltd. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SAR2130P_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_SAR2130P_H + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define SLAVE_QUP_CORE_0 2 +#define SLAVE_QUP_CORE_1 3 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define MASTER_QDSS_DAP 2 +#define SLAVE_AHB2PHY_SOUTH 3 +#define SLAVE_AOSS 4 +#define SLAVE_CAMERA_CFG 5 +#define SLAVE_CLK_CTL 6 +#define SLAVE_CDSP_CFG 7 +#define SLAVE_RBCPR_CX_CFG 8 +#define SLAVE_RBCPR_MMCX_CFG 9 +#define SLAVE_RBCPR_MXA_CFG 10 +#define SLAVE_RBCPR_MXC_CFG 11 +#define SLAVE_CPR_NSPCX 12 +#define SLAVE_CRYPTO_0_CFG 13 +#define SLAVE_CX_RDPM 14 +#define SLAVE_DISPLAY_CFG 15 +#define SLAVE_GFX3D_CFG 16 +#define SLAVE_IMEM_CFG 17 +#define SLAVE_IPC_ROUTER_CFG 18 +#define SLAVE_LPASS 19 +#define SLAVE_MX_RDPM 20 +#define SLAVE_PCIE_0_CFG 21 +#define SLAVE_PCIE_1_CFG 22 +#define SLAVE_PDM 23 +#define SLAVE_PIMEM_CFG 24 +#define SLAVE_PRNG 25 +#define SLAVE_QDSS_CFG 26 +#define SLAVE_QSPI_0 27 +#define SLAVE_QUP_0 28 +#define SLAVE_QUP_1 29 +#define SLAVE_SDCC_1 30 +#define SLAVE_TCSR 31 +#define SLAVE_TLMM 32 +#define SLAVE_TME_CFG 33 +#define SLAVE_USB3_0 34 +#define SLAVE_VENUS_CFG 35 +#define SLAVE_VSENSE_CTRL_CFG 36 +#define SLAVE_WLAN_Q6_CFG 37 +#define SLAVE_DDRSS_CFG 38 +#define SLAVE_CNOC_MNOC_CFG 39 +#define SLAVE_SNOC_CFG 40 +#define SLAVE_IMEM 41 +#define SLAVE_PIMEM 42 +#define SLAVE_SERVICE_CNOC 43 +#define SLAVE_PCIE_0 44 +#define SLAVE_PCIE_1 45 +#define SLAVE_QDSS_STM 46 +#define SLAVE_TCU 47 + +#define MASTER_GPU_TCU 0 +#define MASTER_SYS_TCU 1 +#define MASTER_APPSS_PROC 2 +#define MASTER_GFX3D 3 +#define MASTER_MNOC_HF_MEM_NOC 4 +#define MASTER_MNOC_SF_MEM_NOC 5 +#define MASTER_COMPUTE_NOC 6 +#define MASTER_ANOC_PCIE_GEM_NOC 7 +#define MASTER_SNOC_GC_MEM_NOC 8 +#define MASTER_SNOC_SF_MEM_NOC 9 +#define MASTER_WLAN_Q6 10 +#define SLAVE_GEM_NOC_CNOC 11 +#define SLAVE_LLCC 12 +#define SLAVE_MEM_NOC_PCIE_SNOC 13 + +#define MASTER_CNOC_LPASS_AG_NOC 0 +#define MASTER_LPASS_PROC 1 +#define SLAVE_LPASS_CORE_CFG 2 +#define SLAVE_LPASS_LPI_CFG 3 +#define SLAVE_LPASS_MPU_CFG 4 +#define SLAVE_LPASS_TOP_CFG 5 +#define SLAVE_LPASS_SNOC 6 +#define SLAVE_SERVICES_LPASS_AML_NOC 7 +#define SLAVE_SERVICE_LPASS_AG_NOC 8 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_ICP 1 +#define MASTER_CAMNOC_SF 2 +#define MASTER_LSR 3 +#define MASTER_MDP 4 +#define MASTER_CNOC_MNOC_CFG 5 +#define MASTER_VIDEO 6 +#define MASTER_VIDEO_CV_PROC 7 +#define MASTER_VIDEO_PROC 8 +#define MASTER_VIDEO_V_PROC 9 +#define SLAVE_MNOC_HF_MEM_NOC 10 +#define SLAVE_MNOC_SF_MEM_NOC 11 +#define SLAVE_SERVICE_MNOC 12 + +#define MASTER_CDSP_NOC_CFG 0 +#define MASTER_CDSP_PROC 1 +#define SLAVE_CDSP_MEM_NOC 2 +#define SLAVE_SERVICE_NSP_NOC 3 + +#define MASTER_PCIE_0 0 +#define MASTER_PCIE_1 1 +#define SLAVE_ANOC_PCIE_GEM_NOC 2 + +#define MASTER_GIC_AHB 0 +#define MASTER_QDSS_BAM 1 +#define MASTER_QSPI_0 2 +#define MASTER_QUP_0 3 +#define MASTER_QUP_1 4 +#define MASTER_A2NOC_SNOC 5 +#define MASTER_CNOC_DATAPATH 6 +#define MASTER_LPASS_ANOC 7 +#define MASTER_SNOC_CFG 8 +#define MASTER_CRYPTO 9 +#define MASTER_PIMEM 10 +#define MASTER_GIC 11 +#define MASTER_QDSS_ETR 12 +#define MASTER_QDSS_ETR_1 13 +#define MASTER_SDCC_1 14 +#define MASTER_USB3_0 15 +#define SLAVE_A2NOC_SNOC 16 +#define SLAVE_SNOC_GEM_NOC_GC 17 +#define SLAVE_SNOC_GEM_NOC_SF 18 +#define SLAVE_SERVICE_SNOC 19 + +#endif -- cgit v1.2.3 From c16a7d3ef029494a6f5b5cd20606dd08aeaa2b0f Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 17 Oct 2024 11:51:34 +0300 Subject: dt-bindings: power: Add binding for MediaTek MT6735 power controller Add DT binding for MediaTek MT6735 SCPSYS power controller. Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241017085136.68053-2-y.oudjana@protonmail.com Signed-off-by: Ulf Hansson --- .../dt-bindings/power/mediatek,mt6735-power-controller.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/dt-bindings/power/mediatek,mt6735-power-controller.h (limited to 'include') diff --git a/include/dt-bindings/power/mediatek,mt6735-power-controller.h b/include/dt-bindings/power/mediatek,mt6735-power-controller.h new file mode 100644 index 000000000000..6957075fcb9e --- /dev/null +++ b/include/dt-bindings/power/mediatek,mt6735-power-controller.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_POWER_MT6735_POWER_CONTROLLER_H +#define _DT_BINDINGS_POWER_MT6735_POWER_CONTROLLER_H + +#define MT6735_POWER_DOMAIN_MD1 0 +#define MT6735_POWER_DOMAIN_CONN 1 +#define MT6735_POWER_DOMAIN_DIS 2 +#define MT6735_POWER_DOMAIN_MFG 3 +#define MT6735_POWER_DOMAIN_ISP 4 +#define MT6735_POWER_DOMAIN_VDE 5 +#define MT6735_POWER_DOMAIN_VEN 6 + +#endif -- cgit v1.2.3 From c2114a0d17631497df99388905353c11f4d5f0dd Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 17 Oct 2024 11:51:35 +0300 Subject: pmdomain: mediatek: Add support for MT6735 Add support for SCPSYS power domains of MT6735. All non-CPU power domains are added except for MD2 (C2K modem), which is left out due to issues with powering it on. Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20241017085136.68053-3-y.oudjana@protonmail.com Signed-off-by: Ulf Hansson --- include/linux/soc/mediatek/infracfg.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 6c6cccc848f4..9956e18c5ffa 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -434,6 +434,11 @@ #define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ BIT(7) | BIT(8)) +#define MT6735_TOP_AXI_PROT_EN_CONN (BIT(2) | BIT(8)) +#define MT6735_TOP_AXI_PROT_EN_MD1 (BIT(24) | BIT(25) | \ + BIT(26) | BIT(27) | \ + BIT(28)) + #define INFRA_TOPAXI_PROTECTEN 0x0220 #define INFRA_TOPAXI_PROTECTSTA1 0x0228 #define INFRA_TOPAXI_PROTECTEN_SET 0x0260 -- cgit v1.2.3 From d811ac148f0afd2f3f7e1cd7f54de8da973ec5e3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 Oct 2024 15:56:35 +0200 Subject: virtchnl: fix m68k build. The kernel test robot reported a build failure on m68k in the intel driver due to the recent shapers-related changes. The mentioned arch has funny alignment properties, let's be explicit about the binary layout expectation introducing a padding field. Fixes: 608a5c05c39b ("virtchnl: support queue rate limit and quanta size configuration") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410131710.71Wt6LKO-lkp@intel.com/ Reviewed-by: Alexander Lobakin Reviewed-by: Paul Menzel Reviewed-by: Jacob Keller Link: https://patch.msgid.link/e45d1c9f17356d431b03b419f60b8b763d2ff768.1729000481.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- include/linux/avf/virtchnl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 223e433c39fe..13a11f3c09b8 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -1499,6 +1499,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); struct virtchnl_quanta_cfg { u16 quanta_size; + u16 pad; struct virtchnl_queue_chunk queue_select; }; -- cgit v1.2.3 From 2c50ec98fc6cab28df35e0a22a2bcc7957d9d0ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Oct 2024 14:06:47 -0600 Subject: block: remove redundant passthrough check in blk_mq_need_time_stamp() Simply checking the rq_flags is enough to determine if accounting is being done for this request. Reviewed-by: Keith Busch Reviewed-by: Anuj Gupta Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4fecf46ef681..59e9adf815a4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -857,12 +857,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - /* - * passthrough io doesn't use iostat accounting, cgroup stats - * and io scheduler functionalities. - */ - if (blk_rq_is_passthrough(rq)) - return false; return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } -- cgit v1.2.3 From 9dfd9ea93aeab57d897bb7fc7c0707f26b0b9af8 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:43 +0200 Subject: block: introduce add_disk_fwnode() Introduce add_disk_fwnode() as a replacement of device_add_disk() that permits to pass and attach a fwnode to disk dev. This variant can be useful for eMMC that might have the partition table for the disk defined in DT. A parser can later make use of the attached fwnode to parse the related table and init the hardcoded partition for the disk. device_add_disk() is converted to a simple wrapper of add_disk_fwnode() with the fwnode entry set as NULL. Signed-off-by: Christian Marangi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241002221306.4403-4-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50c3b959da28..a6aae750b4ac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -725,6 +725,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode); int __must_check device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); static inline int __must_check add_disk(struct gendisk *disk) -- cgit v1.2.3 From 110234da18ab482f6f583d28eff26b9569bf5622 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 7 Oct 2024 08:32:35 -0700 Subject: block: enable passthrough command statistics Applications using the passthrough interfaces for IO want to continue seeing the disk stats. These requests had been fenced off from this block layer feature. While the block layer doesn't necessarily know what a passthrough command does, we do know the data size and direction, which is enough to account for the command's stats. Since tracking these has the potential to produce unexpected results, the passthrough stats are locked behind a new queue flag that needs to be enabled with the /sys/block//queue/iostats_passthrough attribute. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241007153236.2818562-1-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a6aae750b4ac..6b78a68e0bd9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -349,6 +349,9 @@ typedef unsigned int __bitwise blk_flags_t; /* I/O topology is misaligned */ #define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) +/* passthrough command IO accounting */ +#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2)) + struct queue_limits { blk_features_t features; blk_flags_t flags; @@ -617,6 +620,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) +#define blk_queue_passthrough_stat(q) \ + ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From 59eaa01ce7a6cbc5c36b928f52888f99fca6b295 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Mon, 7 Oct 2024 12:24:17 -0600 Subject: ublk: support device recovery without I/O queueing ublk currently supports the following behaviors on ublk server exit: A: outstanding I/Os get errors, subsequently issued I/Os get errors B: outstanding I/Os get errors, subsequently issued I/Os queue C: outstanding I/Os get reissued, subsequently issued I/Os queue and the following behaviors for recovery of preexisting block devices by a future incarnation of the ublk server: 1: ublk devices stopped on ublk server exit (no recovery possible) 2: ublk devices are recoverable using start/end_recovery commands The userspace interface allows selection of combinations of these behaviors using flags specified at device creation time, namely: default behavior: A + 1 UBLK_F_USER_RECOVERY: B + 2 UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2 The behavior A + 2 is currently unsupported. Add support for this behavior under the new flag combination UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_FAIL_IO. Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20241007182419.3263186-5-ushankar@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 12873639ea96..a8bc98bb69fc 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -147,8 +147,18 @@ */ #define UBLK_F_NEED_GET_DATA (1UL << 2) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is met with errors + * - I/O issued while there is no ublk server queues + */ #define UBLK_F_USER_RECOVERY (1UL << 3) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is reissued + * - I/O issued while there is no ublk server queues + */ #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) /* @@ -190,10 +200,18 @@ */ #define UBLK_F_ZONED (1ULL << 8) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is met with errors + * - I/O issued while there is no ublk server is met with errors + */ +#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 #define UBLK_S_DEV_QUIESCED 2 +#define UBLK_S_DEV_FAIL_IO 3 /* shipped via sqe->cmd of io_uring command */ struct ublksrv_ctrl_cmd { -- cgit v1.2.3 From b21d948f4cc73e3296f2365c7afca721dd6893fa Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Thu, 29 Aug 2024 12:56:11 -0500 Subject: block: sed-opal: add ioctl IOC_OPAL_SET_SID_PW After a SED drive is provisioned, there is no way to change the SID password via the ioctl() interface. A new ioctl IOC_OPAL_SET_SID_PW will allow the password to be changed. The valid current password is required. Signed-off-by: Greg Joyce Reviewed-by: Daniel Wagner Link: https://lore.kernel.org/r/20240829175639.6478-2-gjoyce@linux.ibm.com Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 2ac50822554e..80f33a93f944 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_GEOMETRY: case IOC_OPAL_DISCOVERY: case IOC_OPAL_REVERT_LSP: + case IOC_OPAL_SET_SID_PW: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index d3994b7716bc..9025dd5a4f0f 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -215,5 +215,6 @@ struct opal_revert_lsp { #define IOC_OPAL_GET_GEOMETRY _IOR('p', 238, struct opal_geometry) #define IOC_OPAL_DISCOVERY _IOW('p', 239, struct opal_discovery) #define IOC_OPAL_REVERT_LSP _IOW('p', 240, struct opal_revert_lsp) +#define IOC_OPAL_SET_SID_PW _IOW('p', 241, struct opal_new_pw) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From 8cf8dfceebdaf282da8a836b2bb578808a12698c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Oct 2024 15:41:34 +0200 Subject: seccomp: Stub for !HAVE_ARCH_SECCOMP_FILTER If we have CONFIG_SECCOMP but not CONFIG_HAVE_ARCH_SECCOMP_FILTER we get a compilation error: ../kernel/entry/common.c: In function 'syscall_trace_enter': ../kernel/entry/common.c:55:23: error: implicit declaration of function '__secure_computing' [-Werror=implicit-function-declaration] 55 | ret = __secure_computing(NULL); | ^~~~~~~~~~~~~~~~~~ This is because generic entry calls __secure_computing() unconditionally. Provide the needed stub similar to how current ARM does this by calling secure_computing_strict() in the absence of secure_computing(). This is similar to what is done for ARM in arch/arm/kernel/ptrace.c. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20241022-seccomp-compile-error-v2-1-c9f08a4f8ebb@linaro.org Signed-off-by: Kees Cook --- include/linux/seccomp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 709ad84809e1..341980599c71 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -32,6 +32,11 @@ static inline int secure_computing(void) } #else extern void secure_computing_strict(int this_syscall); +static inline int __secure_computing(const struct seccomp_data *sd) +{ + secure_computing_strict(sd->nr); + return 0; +} #endif extern long prctl_get_seccomp(void); -- cgit v1.2.3 From cfcdf395c21eeac4543d2b8fef9d29ae9e4559e9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 8 Oct 2024 18:07:02 +0200 Subject: regulator: core: add callback to perform runtime init Provide an initialisation callback to handle runtime parameters. The idea is similar to the regulator_init() callback, but it provides regulator specific structures, instead of just the driver specific data. As an example, this allows the driver to amend the regulator constraints based on runtime parameters if necessary. Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-2-d1251e0ee507@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index f230a472ccd3..d2f4427504f0 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -365,6 +365,8 @@ struct regulator_desc { int (*of_parse_cb)(struct device_node *, const struct regulator_desc *, struct regulator_config *); + int (*init_cb)(struct regulator_dev *, + struct regulator_config *); int id; unsigned int continuous_voltage_range:1; unsigned n_voltages; -- cgit v1.2.3 From 602ff58ae4fe4289b0ca71cba9fb82f7de92cd64 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 8 Oct 2024 18:07:03 +0200 Subject: regulator: core: remove machine init callback from config The machine specific regulator_init() appears to be unused. It does not allow a lot of interaction with the regulator framework, since nothing from the framework is passed along (desc, config, etc ...) Machine specific init may also be done with the added init_cb() in the regulator description, so remove regulator_init(). Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-3-d1251e0ee507@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 0cd76d264727..d0d700ff337a 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -285,8 +285,7 @@ struct regulator_init_data { int num_consumer_supplies; struct regulator_consumer_supply *consumer_supplies; - /* optional regulator machine specific init */ - int (*regulator_init)(void *driver_data); + /* optional regulator machine specific data */ void *driver_data; /* core does not touch this */ }; -- cgit v1.2.3 From 51e32e897539663957f7a0950f66b48f8896efee Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Sat, 19 Oct 2024 14:16:00 +0300 Subject: clk: Provide devm_clk_bulk_get_all_enabled() helper Commit 265b07df758a ("clk: Provide managed helper to get and enable bulk clocks") added devm_clk_bulk_get_all_enable() function, but missed to return the number of clocks stored in the clk_bulk_data table referenced by the clks argument. Without knowing the number, it's not possible to iterate these clocks when needed, hence the argument is useless and could have been simply removed. Introduce devm_clk_bulk_get_all_enabled() variant, which is consistent with devm_clk_bulk_get_all() in terms of the returned value: > 0 if one or more clocks have been stored = 0 if there are no clocks < 0 if an error occurred Moreover, the naming is consistent with devm_clk_get_enabled(), i.e. use the past form of 'enable'. To reduce code duplication and improve patch readability, make devm_clk_bulk_get_all_enable() use the new helper, as suggested by Stephen Boyd. Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Manivannan Sadhasivam Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20241019-clk_bulk_ena_fix-v4-1-57f108f64e70@collabora.com Signed-off-by: Stephen Boyd --- include/linux/clk.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 851a0f2cf42c..1dcee6d701e4 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -496,11 +496,13 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** - * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed) + * devm_clk_bulk_get_all_enabled - Get and enable all clocks of the consumer (managed) * @dev: device for clock "consumer" * @clks: pointer to the clk_bulk_data table of consumer * - * Returns success (0) or negative errno. + * Returns a positive value for the number of clocks obtained while the + * clock references are stored in the clk_bulk_data table in @clks field. + * Returns 0 if there're none and a negative value if something failed. * * This helper function allows drivers to get all clocks of the * consumer and enables them in one operation with management. @@ -508,8 +510,8 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, * is unbound. */ -int __must_check devm_clk_bulk_get_all_enable(struct device *dev, - struct clk_bulk_data **clks); +int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, + struct clk_bulk_data **clks); /** * devm_clk_get - lookup and obtain a managed reference to a clock producer. @@ -1034,7 +1036,7 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, return 0; } -static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev, +static inline int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, struct clk_bulk_data **clks) { return 0; @@ -1136,6 +1138,15 @@ static inline void clk_restore_context(void) {} #endif +/* Deprecated. Use devm_clk_bulk_get_all_enabled() */ +static inline int __must_check +devm_clk_bulk_get_all_enable(struct device *dev, struct clk_bulk_data **clks) +{ + int ret = devm_clk_bulk_get_all_enabled(dev, clks); + + return ret > 0 ? 0 : ret; +} + /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ static inline int clk_prepare_enable(struct clk *clk) { -- cgit v1.2.3 From 97ddae76ddd20ea35d2059086aacd85b707a09c5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Oct 2024 13:41:13 -0400 Subject: Revert "drm/amdkfd: SMI report dropped event count" This reverts commit a3ab2d45b9887ee609cd3bea39f668236935774c. The userspace side for this code is not ready yet so revert for now. Reviewed-by: Philip Yang Signed-off-by: Alex Deucher Cc: Philip Yang --- include/uapi/linux/kfd_ioctl.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 7afd66d45313..fa9f9846b88e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -530,7 +530,6 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, - KFD_SMI_EVENT_DROPPED_EVENT = 12, /* * max event number, as a flag bit to get events from all processes, @@ -611,7 +610,6 @@ struct kfd_ioctl_smi_events_args { * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again * error_code: migrate failure error code, 0 if no error - * drop_count: how many events dropped when fifo is full */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -647,10 +645,6 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) -#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\ - "%lld -%d %d\n", (ns), (pid), (drop_count) - - /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From 7867cb6575ac2f93a2daccdf3df49e4f1a4594e4 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 11 Oct 2024 00:28:31 +0530 Subject: dt-bindings: clock: qcom: Add SA8775P video clock controller Add device tree bindings for the video clock controller on Qualcomm SA8775P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-1-4a9f17dc683a@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sa8775p-videocc.h | 47 ++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sa8775p-videocc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sa8775p-videocc.h b/include/dt-bindings/clock/qcom,sa8775p-videocc.h new file mode 100644 index 000000000000..e6325f68c317 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sa8775p-videocc.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_VIDEO_CC_H +#define _DT_BINDINGS_CLK_QCOM_SA8775P_VIDEO_CC_H + +/* VIDEO_CC clocks */ +#define VIDEO_CC_AHB_CLK 0 +#define VIDEO_CC_AHB_CLK_SRC 1 +#define VIDEO_CC_MVS0_CLK 2 +#define VIDEO_CC_MVS0_CLK_SRC 3 +#define VIDEO_CC_MVS0_DIV_CLK_SRC 4 +#define VIDEO_CC_MVS0C_CLK 5 +#define VIDEO_CC_MVS0C_DIV2_DIV_CLK_SRC 6 +#define VIDEO_CC_MVS1_CLK 7 +#define VIDEO_CC_MVS1_CLK_SRC 8 +#define VIDEO_CC_MVS1_DIV_CLK_SRC 9 +#define VIDEO_CC_MVS1C_CLK 10 +#define VIDEO_CC_MVS1C_DIV2_DIV_CLK_SRC 11 +#define VIDEO_CC_PLL_LOCK_MONITOR_CLK 12 +#define VIDEO_CC_SLEEP_CLK 13 +#define VIDEO_CC_SLEEP_CLK_SRC 14 +#define VIDEO_CC_SM_DIV_CLK_SRC 15 +#define VIDEO_CC_SM_OBS_CLK 16 +#define VIDEO_CC_XO_CLK 17 +#define VIDEO_CC_XO_CLK_SRC 18 +#define VIDEO_PLL0 19 +#define VIDEO_PLL1 20 + +/* VIDEO_CC power domains */ +#define VIDEO_CC_MVS0C_GDSC 0 +#define VIDEO_CC_MVS0_GDSC 1 +#define VIDEO_CC_MVS1C_GDSC 2 +#define VIDEO_CC_MVS1_GDSC 3 + +/* VIDEO_CC resets */ +#define VIDEO_CC_INTERFACE_BCR 0 +#define VIDEO_CC_MVS0_BCR 1 +#define VIDEO_CC_MVS0C_CLK_ARES 2 +#define VIDEO_CC_MVS0C_BCR 3 +#define VIDEO_CC_MVS1_BCR 4 +#define VIDEO_CC_MVS1C_CLK_ARES 5 +#define VIDEO_CC_MVS1C_BCR 6 + +#endif -- cgit v1.2.3 From 9b1873d2350b3f08463563603fdbfcea4f068e67 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 11 Oct 2024 00:28:33 +0530 Subject: dt-bindings: clock: qcom: Add SA8775P camera clock controller Add device tree bindings for the camera clock controller on Qualcomm SA8775P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-3-4a9f17dc683a@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sa8775p-camcc.h | 108 +++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sa8775p-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sa8775p-camcc.h b/include/dt-bindings/clock/qcom,sa8775p-camcc.h new file mode 100644 index 000000000000..38531acd699f --- /dev/null +++ b/include/dt-bindings/clock/qcom,sa8775p-camcc.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_CAM_CC_H +#define _DT_BINDINGS_CLK_QCOM_SA8775P_CAM_CC_H + +/* CAM_CC clocks */ +#define CAM_CC_CAMNOC_AXI_CLK 0 +#define CAM_CC_CAMNOC_AXI_CLK_SRC 1 +#define CAM_CC_CAMNOC_DCD_XO_CLK 2 +#define CAM_CC_CAMNOC_XO_CLK 3 +#define CAM_CC_CCI_0_CLK 4 +#define CAM_CC_CCI_0_CLK_SRC 5 +#define CAM_CC_CCI_1_CLK 6 +#define CAM_CC_CCI_1_CLK_SRC 7 +#define CAM_CC_CCI_2_CLK 8 +#define CAM_CC_CCI_2_CLK_SRC 9 +#define CAM_CC_CCI_3_CLK 10 +#define CAM_CC_CCI_3_CLK_SRC 11 +#define CAM_CC_CORE_AHB_CLK 12 +#define CAM_CC_CPAS_AHB_CLK 13 +#define CAM_CC_CPAS_FAST_AHB_CLK 14 +#define CAM_CC_CPAS_IFE_0_CLK 15 +#define CAM_CC_CPAS_IFE_1_CLK 16 +#define CAM_CC_CPAS_IFE_LITE_CLK 17 +#define CAM_CC_CPAS_IPE_CLK 18 +#define CAM_CC_CPAS_SFE_LITE_0_CLK 19 +#define CAM_CC_CPAS_SFE_LITE_1_CLK 20 +#define CAM_CC_CPHY_RX_CLK_SRC 21 +#define CAM_CC_CSI0PHYTIMER_CLK 22 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 23 +#define CAM_CC_CSI1PHYTIMER_CLK 24 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 25 +#define CAM_CC_CSI2PHYTIMER_CLK 26 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 27 +#define CAM_CC_CSI3PHYTIMER_CLK 28 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 29 +#define CAM_CC_CSID_CLK 30 +#define CAM_CC_CSID_CLK_SRC 31 +#define CAM_CC_CSID_CSIPHY_RX_CLK 32 +#define CAM_CC_CSIPHY0_CLK 33 +#define CAM_CC_CSIPHY1_CLK 34 +#define CAM_CC_CSIPHY2_CLK 35 +#define CAM_CC_CSIPHY3_CLK 36 +#define CAM_CC_FAST_AHB_CLK_SRC 37 +#define CAM_CC_GDSC_CLK 38 +#define CAM_CC_ICP_AHB_CLK 39 +#define CAM_CC_ICP_CLK 40 +#define CAM_CC_ICP_CLK_SRC 41 +#define CAM_CC_IFE_0_CLK 42 +#define CAM_CC_IFE_0_CLK_SRC 43 +#define CAM_CC_IFE_0_FAST_AHB_CLK 44 +#define CAM_CC_IFE_1_CLK 45 +#define CAM_CC_IFE_1_CLK_SRC 46 +#define CAM_CC_IFE_1_FAST_AHB_CLK 47 +#define CAM_CC_IFE_LITE_AHB_CLK 48 +#define CAM_CC_IFE_LITE_CLK 49 +#define CAM_CC_IFE_LITE_CLK_SRC 50 +#define CAM_CC_IFE_LITE_CPHY_RX_CLK 51 +#define CAM_CC_IFE_LITE_CSID_CLK 52 +#define CAM_CC_IFE_LITE_CSID_CLK_SRC 53 +#define CAM_CC_IPE_AHB_CLK 54 +#define CAM_CC_IPE_CLK 55 +#define CAM_CC_IPE_CLK_SRC 56 +#define CAM_CC_IPE_FAST_AHB_CLK 57 +#define CAM_CC_MCLK0_CLK 58 +#define CAM_CC_MCLK0_CLK_SRC 59 +#define CAM_CC_MCLK1_CLK 60 +#define CAM_CC_MCLK1_CLK_SRC 61 +#define CAM_CC_MCLK2_CLK 62 +#define CAM_CC_MCLK2_CLK_SRC 63 +#define CAM_CC_MCLK3_CLK 64 +#define CAM_CC_MCLK3_CLK_SRC 65 +#define CAM_CC_PLL0 66 +#define CAM_CC_PLL0_OUT_EVEN 67 +#define CAM_CC_PLL0_OUT_ODD 68 +#define CAM_CC_PLL2 69 +#define CAM_CC_PLL3 70 +#define CAM_CC_PLL3_OUT_EVEN 71 +#define CAM_CC_PLL4 72 +#define CAM_CC_PLL4_OUT_EVEN 73 +#define CAM_CC_PLL5 74 +#define CAM_CC_PLL5_OUT_EVEN 75 +#define CAM_CC_SFE_LITE_0_CLK 76 +#define CAM_CC_SFE_LITE_0_FAST_AHB_CLK 77 +#define CAM_CC_SFE_LITE_1_CLK 78 +#define CAM_CC_SFE_LITE_1_FAST_AHB_CLK 79 +#define CAM_CC_SLEEP_CLK 80 +#define CAM_CC_SLEEP_CLK_SRC 81 +#define CAM_CC_SLOW_AHB_CLK_SRC 82 +#define CAM_CC_SM_OBS_CLK 83 +#define CAM_CC_XO_CLK_SRC 84 +#define CAM_CC_QDSS_DEBUG_XO_CLK 85 + +/* CAM_CC power domains */ +#define CAM_CC_TITAN_TOP_GDSC 0 + +/* CAM_CC resets */ +#define CAM_CC_ICP_BCR 0 +#define CAM_CC_IFE_0_BCR 1 +#define CAM_CC_IFE_1_BCR 2 +#define CAM_CC_IPE_0_BCR 3 +#define CAM_CC_SFE_LITE_0_BCR 4 +#define CAM_CC_SFE_LITE_1_BCR 5 + +#endif -- cgit v1.2.3 From 33b5cd95d801cab8c634c822e6877470f4209612 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 11 Oct 2024 00:28:35 +0530 Subject: dt-bindings: clock: qcom: Add SA8775P display clock controllers Add device tree bindings for the display clock controllers on Qualcomm SA8775P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-5-4a9f17dc683a@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sa8775p-dispcc.h | 87 +++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sa8775p-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sa8775p-dispcc.h b/include/dt-bindings/clock/qcom,sa8775p-dispcc.h new file mode 100644 index 000000000000..e2049e510658 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sa8775p-dispcc.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_DISP_CC_H +#define _DT_BINDINGS_CLK_QCOM_SA8775P_DISP_CC_H + +/* DISP_CC_0/1 clocks */ +#define MDSS_DISP_CC_MDSS_AHB1_CLK 0 +#define MDSS_DISP_CC_MDSS_AHB_CLK 1 +#define MDSS_DISP_CC_MDSS_AHB_CLK_SRC 2 +#define MDSS_DISP_CC_MDSS_BYTE0_CLK 3 +#define MDSS_DISP_CC_MDSS_BYTE0_CLK_SRC 4 +#define MDSS_DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define MDSS_DISP_CC_MDSS_BYTE0_INTF_CLK 6 +#define MDSS_DISP_CC_MDSS_BYTE1_CLK 7 +#define MDSS_DISP_CC_MDSS_BYTE1_CLK_SRC 8 +#define MDSS_DISP_CC_MDSS_BYTE1_DIV_CLK_SRC 9 +#define MDSS_DISP_CC_MDSS_BYTE1_INTF_CLK 10 +#define MDSS_DISP_CC_MDSS_DPTX0_AUX_CLK 11 +#define MDSS_DISP_CC_MDSS_DPTX0_AUX_CLK_SRC 12 +#define MDSS_DISP_CC_MDSS_DPTX0_CRYPTO_CLK 13 +#define MDSS_DISP_CC_MDSS_DPTX0_CRYPTO_CLK_SRC 14 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_CLK 15 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_CLK_SRC 16 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC 17 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_INTF_CLK 18 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL0_CLK 19 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC 20 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL1_CLK 21 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC 22 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL2_CLK 23 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL2_CLK_SRC 24 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL3_CLK 25 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL3_CLK_SRC 26 +#define MDSS_DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK 27 +#define MDSS_DISP_CC_MDSS_DPTX1_AUX_CLK 28 +#define MDSS_DISP_CC_MDSS_DPTX1_AUX_CLK_SRC 29 +#define MDSS_DISP_CC_MDSS_DPTX1_CRYPTO_CLK 30 +#define MDSS_DISP_CC_MDSS_DPTX1_CRYPTO_CLK_SRC 31 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_CLK 32 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_CLK_SRC 33 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC 34 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_INTF_CLK 35 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL0_CLK 36 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC 37 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL1_CLK 38 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC 39 +#define MDSS_DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK 40 +#define MDSS_DISP_CC_MDSS_ESC0_CLK 41 +#define MDSS_DISP_CC_MDSS_ESC0_CLK_SRC 42 +#define MDSS_DISP_CC_MDSS_ESC1_CLK 43 +#define MDSS_DISP_CC_MDSS_ESC1_CLK_SRC 44 +#define MDSS_DISP_CC_MDSS_MDP1_CLK 45 +#define MDSS_DISP_CC_MDSS_MDP_CLK 46 +#define MDSS_DISP_CC_MDSS_MDP_CLK_SRC 47 +#define MDSS_DISP_CC_MDSS_MDP_LUT1_CLK 48 +#define MDSS_DISP_CC_MDSS_MDP_LUT_CLK 49 +#define MDSS_DISP_CC_MDSS_NON_GDSC_AHB_CLK 50 +#define MDSS_DISP_CC_MDSS_PCLK0_CLK 51 +#define MDSS_DISP_CC_MDSS_PCLK0_CLK_SRC 52 +#define MDSS_DISP_CC_MDSS_PCLK1_CLK 53 +#define MDSS_DISP_CC_MDSS_PCLK1_CLK_SRC 54 +#define MDSS_DISP_CC_MDSS_PLL_LOCK_MONITOR_CLK 55 +#define MDSS_DISP_CC_MDSS_RSCC_AHB_CLK 56 +#define MDSS_DISP_CC_MDSS_RSCC_VSYNC_CLK 57 +#define MDSS_DISP_CC_MDSS_VSYNC1_CLK 58 +#define MDSS_DISP_CC_MDSS_VSYNC_CLK 59 +#define MDSS_DISP_CC_MDSS_VSYNC_CLK_SRC 60 +#define MDSS_DISP_CC_PLL0 61 +#define MDSS_DISP_CC_PLL1 62 +#define MDSS_DISP_CC_SLEEP_CLK 63 +#define MDSS_DISP_CC_SLEEP_CLK_SRC 64 +#define MDSS_DISP_CC_SM_OBS_CLK 65 +#define MDSS_DISP_CC_XO_CLK 66 +#define MDSS_DISP_CC_XO_CLK_SRC 67 + +/* DISP_CC_0/1 power domains */ +#define MDSS_DISP_CC_MDSS_CORE_GDSC 0 +#define MDSS_DISP_CC_MDSS_CORE_INT2_GDSC 1 + +/* DISP_CC_0/1 resets */ +#define MDSS_DISP_CC_MDSS_CORE_BCR 0 +#define MDSS_DISP_CC_MDSS_RSCC_BCR 1 + +#endif -- cgit v1.2.3 From dbc81e680a0f007cf062963c40f145074aacab2d Mon Sep 17 00:00:00 2001 From: Lijuan Gao Date: Tue, 22 Oct 2024 16:54:30 +0800 Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS615 Add the ID for the Qualcomm QCS615 SoC. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lijuan Gao Link: https://lore.kernel.org/r/20241022-add_initial_support_for_qcs615-v4-2-0a551c6dd342@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 8332f8d82f96..73a69fc535f6 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -278,6 +278,7 @@ #define QCOM_ID_IPQ5321 650 #define QCOM_ID_QCS8300 674 #define QCOM_ID_QCS8275 675 +#define QCOM_ID_QCS615 680 /* * The board type and revision information, used by Qualcomm bootloaders and -- cgit v1.2.3 From 3b3214acd7f2dd506924cd57ce6e380cdaede423 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Oct 2024 19:32:36 +0300 Subject: dt-bindings: arm: qcom,ids: add SoC ID for SAR2130P and SAR1130P Add the IDs for Qualcomm SAR2130P and SAR1130P platforms. Signed-off-by: Dmitry Baryshkov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241018-sar2130p-socinfo-v1-1-b1e97ea963fe@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 73a69fc535f6..1a53ca2df523 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -255,6 +255,7 @@ #define QCOM_ID_IPQ9510 521 #define QCOM_ID_QRB4210 523 #define QCOM_ID_QRB2210 524 +#define QCOM_ID_SAR2130P 525 #define QCOM_ID_SM8475 530 #define QCOM_ID_SM8475P 531 #define QCOM_ID_SA8775P 534 @@ -264,6 +265,7 @@ #define QCOM_ID_X1E80100 555 #define QCOM_ID_SM8650 557 #define QCOM_ID_SM4450 568 +#define QCOM_ID_SAR1130P 579 #define QCOM_ID_QDU1010 587 #define QCOM_ID_QRU1032 588 #define QCOM_ID_QRU1052 589 -- cgit v1.2.3 From 44059790a5cb9258ae6137387e4c39b717fd2ced Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 07:53:04 +0200 Subject: kfifo: don't include dma-mapping.h in kfifo.h Nothing in kfifo.h directly needs dma-mapping.h, only two macros use DMA_MAPPING_ERROR when actually instantiated. Drop the dma-mapping.h include to reduce include bloat. Add an explicity include to drivers/mailbox/omap-mailbox.c as that file uses __raw_readl and __raw_writel through a complicated include chain involving Fixes: d52b761e4b1a ("kfifo: add kfifo_dma_out_prepare_mapped()") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241023055317.313234-1-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- include/linux/kfifo.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 564868bdce89..fd743d4c4b4b 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -37,7 +37,6 @@ */ #include -#include #include #include #include -- cgit v1.2.3 From 83c289e81e88d01e55d6d56531502ed7b4886a05 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 17 Oct 2024 19:19:34 +0300 Subject: net/sched: act_api: unexport tcf_action_dump_1() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This isn't used outside act_api.c, but is called by tcf_dump_walker() prior to its definition. So move it upwards and make it static. Simultaneously, reorder the variable declarations so that they follow the networking "reverse Christmas tree" coding style. Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241017161934.3599046-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- include/net/act_api.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 77ee0c657e2c..404df8557f6a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -219,7 +219,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); -int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); static inline void tcf_action_update_bstats(struct tc_action *a, struct sk_buff *skb) -- cgit v1.2.3 From c972c1c41d9b20fb38b54e77dcee763e27e715a9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 18:41:00 -0700 Subject: ipv4: Switch inet_addr_hash() to less predictable hash. Recently, commit 4a0ec2aa0704 ("ipv6: switch inet6_addr_hash() to less predictable hash") and commit 4daf4dc275f1 ("ipv6: switch inet6_acaddr_hash() to less predictable hash") hardened IPv6 address hash functions. inet_addr_hash() is also highly predictable, and a malicious use could abuse a specific bucket. Let's follow the change on IPv4 by using jhash_1word(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241018014100.93776-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 4be0a6a603b2..0e548c1f2a0e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -690,6 +690,11 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 __ipv4_addr_hash(const __be32 ip, const u32 initval) +{ + return jhash_1word((__force u32)ip, initval); +} + static inline u32 ipv4_portaddr_hash(const struct net *net, __be32 saddr, unsigned int port) -- cgit v1.2.3 From e44ef3f66c5472c2cbc6957c684d7279c26b0db1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2024 05:21:08 +0000 Subject: netpoll: remove ndo_netpoll_setup() second argument npinfo is not used in any of the ndo_netpoll_setup() methods. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241018052108.2610827-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8feaca12655e..86a0b7eb9461 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1425,8 +1425,7 @@ struct net_device_ops { __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); - int (*ndo_netpoll_setup)(struct net_device *dev, - struct netpoll_info *info); + int (*ndo_netpoll_setup)(struct net_device *dev); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, -- cgit v1.2.3 From 7cfc1b1fa8673fe386304194d4cc2c8fe555bbf9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2024 05:23:10 +0000 Subject: net: netdev_tx_sent_queue() small optimization Change smp_mb() imediately following a set_bit() with smp_mb__after_atomic(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241018052310.2612084-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 86a0b7eb9461..bbd30f3c5d29 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3517,7 +3517,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, * because in netdev_tx_completed_queue we update the dql_completed * before checking the XOFF flag. */ - smp_mb(); + smp_mb__after_atomic(); /* check again in case another CPU has just made room avail */ if (unlikely(dql_avail(&dev_queue->dql) >= 0)) -- cgit v1.2.3 From e55f45b0cda71ac2e9b4c6dee8852dc8d6f22ef0 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 23 Oct 2024 10:35:43 +0200 Subject: regulator: doc: add missing documentation for init_cb Add comment documenting introduced init_cb. This solves the following warning when building the kernel documentation: ./include/linux/regulator/driver.h:435: warning: Function parameter or struct member 'init_cb' not described in 'regulator_desc' Fixes: cfcdf395c21e ("regulator: core: add callback to perform runtime init") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20241023155120.6c4fea20@canb.auug.org.au/ Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241023-regulator-doc-fixup-v1-1-ec018742ad73@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d2f4427504f0..5b66caf1695d 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -269,6 +269,11 @@ enum regulator_type { * config but it cannot store it for later usage. * Callback should return 0 on success or negative ERRNO * indicating failure. + * @init_cb: Optional callback called after the parsing of init_data. + * Allows the regulator to perform runtime init if necessary, + * such as synching the regulator and the parsed constraints. + * Callback should return 0 on success or negative ERRNO + * indicating failure. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. -- cgit v1.2.3 From d1bc2d5cca434e7c854fd16ef021c16d74293b60 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 23 Oct 2024 10:35:44 +0200 Subject: regulator: doc: remove documentation comment for regulator_init Remove documentation comment related to regulator_init callback. This solves the following warning when building the kernel documentation: ./include/linux/regulator/machine.h:290: warning: Excess struct member 'regulator_init' description in 'regulator_init_data' Fixes: 602ff58ae4fe ("regulator: core: remove machine init callback from config") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20241023155257.0fa7211d@canb.auug.org.au/ Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241023-regulator-doc-fixup-v1-2-ec018742ad73@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index d0d700ff337a..b3db09a7429b 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -273,8 +273,6 @@ struct regulator_consumer_supply { * be usable. * @num_consumer_supplies: Number of consumer device supplies. * @consumer_supplies: Consumer device supply configuration. - * - * @regulator_init: Callback invoked when the regulator has been registered. * @driver_data: Data passed to regulator_init. */ struct regulator_init_data { -- cgit v1.2.3 From 50a191a8a12b33dfad3b27c6ba4e76c5ba39db73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 10 Aug 2024 19:00:35 +0200 Subject: sysctl: update comments to new registration APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl registration APIs do not need a terminating table entry anymore and with commit acc154691fc7 ("sysctl: Warn on an empty procname element") even emit warnings if such a sentinel entry is supplied. While at it also remove the mention of "table->de" which was removed in commit 3fbfa98112fc ("[PATCH] sysctl: remove the proc_dir_entry member for the sysctl tables") back in 2007. Signed-off-by: Thomas Weißschuh Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/sysctl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 202855befa8b..40a6ac6c9713 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -90,9 +90,7 @@ int proc_do_static_key(const struct ctl_table *table, int write, void *buffer, /* * Register a set of sysctl names by calling register_sysctl - * with an initialised array of struct ctl_table's. An entry with - * NULL procname terminates the table. table->de will be - * set up by the registration and need not be initialised in advance. + * with an initialised array of struct ctl_table's. * * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. @@ -133,7 +131,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { - const char *procname; /* Text ID for /proc/sys, or zero */ + const char *procname; /* Text ID for /proc/sys */ void *data; int maxlen; umode_t mode; -- cgit v1.2.3 From 074a8b54dacc1920f54381f3661ecee6786b0c21 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 7 Oct 2024 15:00:45 +0300 Subject: wifi: mac80211: Add support to indicate that a new interface is to be added Add support to indicate to the driver that an interface is about to be added so that the driver could prepare its resources early if it needs so. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.e0e8563e1c30.Ifccc96a46a347eb15752caefc9f4eff31f75ed47@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 333e0fae6796..0b8df8ec5a3b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4444,6 +4444,12 @@ struct ieee80211_prep_tx_info { * if the requested TID-To-Link mapping can be accepted or not. * If it's not accepted the driver may suggest a preferred mapping and * modify @ttlm parameter with the suggested TID-to-Link mapping. + * @prep_add_interface: prepare for interface addition. This can be used by + * drivers to prepare for the addition of a new interface, e.g., allocate + * the needed resources etc. This callback doesn't guarantee that an + * interface with the specified type would be added, and thus drivers that + * implement this callback need to handle such cases. The type is the full + * &enum nl80211_iftype. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4828,6 +4834,8 @@ struct ieee80211_ops { enum ieee80211_neg_ttlm_res (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_neg_ttlm *ttlm); + void (*prep_add_interface)(struct ieee80211_hw *hw, + enum nl80211_iftype type); }; /** -- cgit v1.2.3 From 62262dd00c319195f2e14022903b7ebbb53119bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 15:00:46 +0300 Subject: wifi: cfg80211: disallow SMPS in AP mode In practice, userspace hasn't been able to set this for many years, and mac80211 has already rejected it (which is now no longer needed), so reject SMPS mode (other than "OFF" to be a bit more compatible) in AP mode. Also remove the parameter from the AP settings struct. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.fe1fc46484cf.I8676fb52b818a4bedeb9c25b901e1396277ffc0b@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69ec1eb41a09..c8ce5c2e14f4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1460,7 +1460,6 @@ struct cfg80211_unsol_bcast_probe_resp { * @crypto: crypto settings * @privacy: the BSS uses privacy * @auth_type: Authentication type (algorithm) - * @smps_mode: SMPS mode * @inactivity_timeout: time in seconds to determine station's inactivity. * @p2p_ctwindow: P2P CT Window * @p2p_opp_ps: P2P opportunistic PS @@ -1498,7 +1497,6 @@ struct cfg80211_ap_settings { struct cfg80211_crypto_settings crypto; bool privacy; enum nl80211_auth_type auth_type; - enum nl80211_smps_mode smps_mode; int inactivity_timeout; u8 p2p_ctwindow; bool p2p_opp_ps; -- cgit v1.2.3 From 9c5f2c7eeb585834f8dadb552b4fd811dd2dee6f Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 7 Oct 2024 15:00:47 +0300 Subject: wifi: mac80211: rename IEEE80211_CHANCTX_CHANGE_MIN_WIDTH The name is misleading, this actually indicates that ieee80211_chanctx_conf::min_def was updated. Rename it to IEEE80211_CHANCTX_CHANGE_MIN_DEF. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.726b5f12ae0c.I3bd9e594c9d2735183ec049a4c7224bd0a9599c9@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0b8df8ec5a3b..c42ad5a0c303 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -213,7 +213,7 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_RADAR: radar detection flag changed * @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel, * this is used only with channel switching with CSA - * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed + * @IEEE80211_CHANCTX_CHANGE_MIN_DEF: The min chandef changed * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider * bandwidth) OFDMA settings need to be changed * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap @@ -224,7 +224,7 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_RX_CHAINS = BIT(1), IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2), IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), - IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), + IEEE80211_CHANCTX_CHANGE_MIN_DEF = BIT(4), IEEE80211_CHANCTX_CHANGE_AP = BIT(5), IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6), }; -- cgit v1.2.3 From e21dd758cf4c51d508e6665b653c5836103d1027 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 7 Oct 2024 15:00:48 +0300 Subject: wifi: mac80211: make bss_param_ch_cnt available for the low level driver Drivers may need to track this. Make it available for them, and maintain the value when beacons are received. When link X receives a beacon, iterate the RNR elements and update all the links with their respective data. Track the link id that updated the data so that each link can know whether the update came from its own beacon or from another link. In case, the update came from the link's own beacon, always update the updater link id. The purpose is to let the low level driver know if a link is losing its beacons. If link X is losing its beacons, it can still track the bss_param_ch_cnt and know where the update came from. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.e2d8d1a722ad.I04b883daba2cd48e5730659eb62ca1614c899cbb@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c42ad5a0c303..20562676e9cd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -740,6 +740,19 @@ struct ieee80211_parsed_tpe { * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the * reception of an EHT TB PPDU on an RU that spans the entire PPDU * bandwidth + * @bss_param_ch_cnt: in BSS-mode, the BSS params change count. This + * information is the latest known value. It can come from this link's + * beacon or from a beacon sent by another link. + * @bss_param_ch_cnt_link_id: in BSS-mode, the link_id to which the beacon + * that updated &bss_param_ch_cnt belongs. E.g. if link 1 doesn't hear + * its beacons, and link 2 sent a beacon with an RNR element that updated + * link 1's BSS params change count, then, link 1's + * bss_param_ch_cnt_link_id will be 2. That means that link 1 knows that + * link 2 was the link that updated its bss_param_ch_cnt value. + * In case link 1 hears its beacon again, bss_param_ch_cnt_link_id will + * be updated to 1, even if bss_param_ch_cnt didn't change. This allows + * the link to know that it heard the latest value from its own beacon + * (as opposed to hearing its value from another link's beacon). */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -834,6 +847,8 @@ struct ieee80211_bss_conf { bool eht_su_beamformee; bool eht_mu_beamformer; bool eht_80mhz_full_bw_ul_mumimo; + u8 bss_param_ch_cnt; + u8 bss_param_ch_cnt_link_id; }; /** -- cgit v1.2.3 From 88b67e91e2928c3311f3658d1270b40708b0de00 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 15:00:54 +0300 Subject: wifi: mac80211: call rate_control_rate_update() for link STA In order to update the right link information, call the update rate_control_rate_update() with the right link_sta, and then pass that through to the driver's sta_rc_update() method. The software rate control still doesn't support it, but that'll be skipped by not having a rate control ref. Since it now operates on a link sta, rename the driver method. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.5851b6b5fd41.Ibdf50d96afa4b761dd9b9dfd54a1147e77a75329@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 20562676e9cd..7a6edc04e212 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4090,8 +4090,8 @@ struct ieee80211_prep_tx_info { * in @sta_state. * The callback can sleep. * - * @sta_rc_update: Notifies the driver of changes to the bitrates that can be - * used to transmit to the station. The changes are advertised with bits + * @link_sta_rc_update: Notifies the driver of changes to the bitrates that can + * be used to transmit to the station. The changes are advertised with bits * from &enum ieee80211_rate_control_changed and the values are reflected * in the station data. This callback should only be used when the driver * uses hardware rate control (%IEEE80211_HW_HAS_RATE_CONTROL) since @@ -4581,10 +4581,10 @@ struct ieee80211_ops { void (*sta_pre_rcu_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); - void (*sta_rc_update)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u32 changed); + void (*link_sta_rc_update)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_sta *link_sta, + u32 changed); void (*sta_rate_tbl_update)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); -- cgit v1.2.3 From 751e7489c1d74b94ffffbed619d8fd724eeff4ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 15:00:58 +0300 Subject: wifi: mac80211: expose ieee80211_chan_width_to_rx_bw() to drivers Drivers might need to also do this calculation, no point in them duplicating the code. Since it's so simple, just make it an inline. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.af003cb4a088.I8b5d29504b726caae24af6013c65b3daebe842a2@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7a6edc04e212..32bdda90a4ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7704,6 +7704,33 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif, */ void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif); +/** + * ieee80211_chan_width_to_rx_bw - convert channel width to STA RX bandwidth + * @width: the channel width value to convert + * Return: the STA RX bandwidth value for the channel width + */ +static inline enum ieee80211_sta_rx_bandwidth +ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) +{ + switch (width) { + default: + WARN_ON_ONCE(1); + fallthrough; + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + return IEEE80211_STA_RX_BW_20; + case NL80211_CHAN_WIDTH_40: + return IEEE80211_STA_RX_BW_40; + case NL80211_CHAN_WIDTH_80: + return IEEE80211_STA_RX_BW_80; + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_80P80: + return IEEE80211_STA_RX_BW_160; + case NL80211_CHAN_WIDTH_320: + return IEEE80211_STA_RX_BW_320; + } +} + /* for older drivers - let's not document these ... */ int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx); -- cgit v1.2.3 From 3607798ad9bdef35ad08489a8239390fccaac6b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:42 +0200 Subject: wifi: cfg80211: add option for vif allowed radios This allows users to prevent a vif from affecting radios other than the configured ones. This can be useful in cases where e.g. an AP is running on one radio, and triggering a scan on another radio should not disturb it. Changing the allowed radios list for a vif is supported, but only while it is down. While it is possible to achieve the same by always explicitly specifying a frequency list for scan requests and ensuring that the wrong channel/band is never accidentally set on an unrelated interface, this change makes multi-radio wiphy setups a lot easier to deal with for CLI users. By itself, this patch only enforces the radio mask for scanning requests and remain-on-channel. Follow-up changes build on this to limit configured frequencies. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/eefcb218780f71a1549875d149f1196486762756.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++++++++++ include/uapi/linux/nl80211.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c8ce5c2e14f4..95d05e67e69a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6221,6 +6221,7 @@ enum ieee80211_ap_reg_power { * entered. * @links.cac_time_ms: CAC time in ms * @valid_links: bitmap describing what elements of @links are valid + * @radio_mask: Bitmask of radios that this interface is allowed to operate on. */ struct wireless_dev { struct wiphy *wiphy; @@ -6333,6 +6334,8 @@ struct wireless_dev { unsigned int cac_time_ms; } links[IEEE80211_MLD_MAX_NUM_LINKS]; u16 valid_links; + + u32 radio_mask; }; static inline const u8 *wdev_address(struct wireless_dev *wdev) @@ -6518,6 +6521,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, const struct cfg80211_chan_def *chandef); +/** + * cfg80211_wdev_channel_allowed - Check if the wdev may use the channel + * + * @wdev: the wireless device + * @chan: channel to check + * + * Return: whether or not the wdev may use the channel + */ +bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev, + struct ieee80211_channel *chan); + /** * ieee80211_get_response_rate - get basic rate for a given rate * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f97f5adc8d51..d31ccee99cc7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2868,6 +2868,9 @@ enum nl80211_commands { * nested item, it contains attributes defined in * &enum nl80211_if_combination_attrs. * + * @NL80211_ATTR_VIF_RADIO_MASK: Bitmask of allowed radios (u32). + * A value of 0 means all radios. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3416,6 +3419,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_RADIOS, NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + NL80211_ATTR_VIF_RADIO_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From ebda716ea4da03326ac4d0a71604d18aa8a2e695 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:45 +0200 Subject: wifi: cfg80211: report per wiphy radio antenna mask With multi-radio devices, each radio typically gets a fixed set of antennas. In order to be able to disable specific antennas for some radios, user space needs to know which antenna mask bits are assigned to which radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/e0a26afa2c88eaa188ec96ec6d17ecac4e827641.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 95d05e67e69a..3100733f3e23 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5434,6 +5434,8 @@ struct wiphy_radio_freq_range { * @iface_combinations: Valid interface combinations array, should not * list single interface types. * @n_iface_combinations: number of entries in @iface_combinations array. + * + * @antenna_mask: bitmask of antennas connected to this radio. */ struct wiphy_radio { const struct wiphy_radio_freq_range *freq_range; @@ -5441,6 +5443,8 @@ struct wiphy_radio { const struct ieee80211_iface_combination *iface_combinations; int n_iface_combinations; + + u32 antenna_mask; }; #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d31ccee99cc7..1b8827f920ff 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -8036,6 +8036,8 @@ enum nl80211_ap_settings_flags { * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface * combination for this radio. Attribute may be present multiple times * and contains attributes defined in &enum nl80211_if_combination_attrs. + * @NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK: bitmask (u32) of antennas + * connected to this radio. * * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute @@ -8046,6 +8048,7 @@ enum nl80211_wiphy_radio_attrs { NL80211_WIPHY_RADIO_ATTR_INDEX, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, /* keep last */ __NL80211_WIPHY_RADIO_ATTR_LAST, -- cgit v1.2.3 From 006a97ceb6732c861c0e2fa3b6a34512caac9354 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:46 +0200 Subject: wifi: mac80211: remove status->ampdu_delimiter_crc This was never used by any driver, so remove it to free up some space. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/e6fee6eed49b105261830db1c74f13841fb9616c.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 32bdda90a4ac..d4f23224eff9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1463,8 +1463,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU * @RX_FLAG_AMPDU_DELIM_CRC_ERROR: A delimiter CRC error has been detected * on this subframe - * @RX_FLAG_AMPDU_DELIM_CRC_KNOWN: The delimiter CRC field is known (the CRC - * is stored in the @ampdu_delimiter_crc field) * @RX_FLAG_MIC_STRIPPED: The mic was stripped of this packet. Decryption was * done by the hardware * @RX_FLAG_ONLY_MONITOR: Report frame only to monitor interfaces without @@ -1536,7 +1534,7 @@ enum mac80211_rx_flags { RX_FLAG_AMPDU_LAST_KNOWN = BIT(12), RX_FLAG_AMPDU_IS_LAST = BIT(13), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14), - RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15), + /* one free bit at 15 */ RX_FLAG_MACTIME = BIT(16) | BIT(17), RX_FLAG_MACTIME_PLCP_START = 1 << 16, RX_FLAG_MACTIME_START = 2 << 16, @@ -1633,7 +1631,6 @@ enum mac80211_rx_encoding { * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU - * @ampdu_delimiter_crc: A-MPDU delimiter CRC * @zero_length_psdu_type: radiotap type of the 0-length PSDU * @link_valid: if the link which is identified by @link_id is valid. This flag * is set only when connection is MLO. @@ -1671,7 +1668,6 @@ struct ieee80211_rx_status { s8 signal; u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; - u8 ampdu_delimiter_crc; u8 zero_length_psdu_type; u8 link_valid:1, link_id:4; }; -- cgit v1.2.3 From 9c4f830927750a2bf9fd9426a5257f0fdce3b662 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:47 +0200 Subject: wifi: cfg80211: pass net_device to .set_monitor_channel Preparation for allowing multiple monitor interfaces with different channels on a multi-radio wiphy. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/35fa652dbfebf93343f8b9a08fdef0467a2a02dc.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3100733f3e23..5feb93ba0400 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4694,6 +4694,7 @@ struct cfg80211_ops { struct ieee80211_channel *chan); int (*set_monitor_channel)(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef); int (*scan)(struct wiphy *wiphy, -- cgit v1.2.3 From 9d40f7e32774279be7e5a7a278d7a290872b2f81 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:48 +0200 Subject: wifi: mac80211: add flag to opt out of virtual monitor support This is useful for multi-radio devices that are capable of monitoring on multiple channels simultanenously. When this flag is set, each monitor interface is passed to the driver individually and can have a configured channel. The vif mac address for non-active monitor interfaces is cleared, in order to allow the driver to tell them apart from active ones. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/3c55505ee0cf0a5f141fbcb30d1e8be8d9f40373.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4f23224eff9..b4f246cdcca4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2694,6 +2694,11 @@ struct ieee80211_txq { * a virtual monitor interface when monitor interfaces are the only * active interfaces. * + * @IEEE80211_HW_NO_VIRTUAL_MONITOR: The driver would like to be informed + * of any monitor interface, as well as their configured channel. + * This is useful for supporting multiple monitor interfaces on different + * channels. + * * @IEEE80211_HW_NO_AUTO_VIF: The driver would like for no wlanX to * be created. It is expected user-space will create vifs as * desired (and thus have them named as desired). @@ -2853,6 +2858,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_DYNAMIC_PS, IEEE80211_HW_MFP_CAPABLE, IEEE80211_HW_WANT_MONITOR_VIF, + IEEE80211_HW_NO_VIRTUAL_MONITOR, IEEE80211_HW_NO_AUTO_VIF, IEEE80211_HW_SW_CRYPTO_CONTROL, IEEE80211_HW_SUPPORT_FAST_XMIT, -- cgit v1.2.3 From a77e527b470cc38754c730bce1483711f643bb60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:49 +0200 Subject: wifi: cfg80211: add monitor SKIP_TX flag This can be used to indicate that the user is not interested in receiving locally sent packets on the monitor interface. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/f0c20f832eadd36c71fba9a2a16ba57d78389b6c.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5feb93ba0400..8f9853b1a5d1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2267,6 +2267,7 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering * @MONITOR_FLAG_COOK_FRAMES: report frames after processing * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address + * @MONITOR_FLAG_SKIP_TX: do not pass locally transmitted frames */ enum monitor_flags { MONITOR_FLAG_CHANGED = BIT(__NL80211_MNTR_FLAG_INVALID), @@ -2276,6 +2277,7 @@ enum monitor_flags { MONITOR_FLAG_OTHER_BSS = BIT(NL80211_MNTR_FLAG_OTHER_BSS), MONITOR_FLAG_COOK_FRAMES = BIT(NL80211_MNTR_FLAG_COOK_FRAMES), MONITOR_FLAG_ACTIVE = BIT(NL80211_MNTR_FLAG_ACTIVE), + MONITOR_FLAG_SKIP_TX = BIT(NL80211_MNTR_FLAG_SKIP_TX), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1b8827f920ff..6d11437596b9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4703,6 +4703,7 @@ enum nl80211_survey_info { * overrides all other flags. * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address * and ACK incoming unicast packets. + * @NL80211_MNTR_FLAG_SKIP_TX: do not pass local tx packets * * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag @@ -4715,6 +4716,7 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_OTHER_BSS, NL80211_MNTR_FLAG_COOK_FRAMES, NL80211_MNTR_FLAG_ACTIVE, + NL80211_MNTR_FLAG_SKIP_TX, /* keep last */ __NL80211_MNTR_FLAG_AFTER_LAST, -- cgit v1.2.3 From 8dc6d81c6b2acc434b00c4585f0594739031c4e4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Oct 2024 15:18:34 +0200 Subject: debugfs: add small file operations for most files As struct file_operations is really big, but (most) debugfs files only use simple_open, read, write and perhaps seek, and don't need anything else, this wastes a lot of space for NULL pointers. Add a struct debugfs_short_fops and some bookkeeping code in debugfs so that users can use that with debugfs_create_file() using _Generic to figure out which function to use. Converting mac80211 to use it where possible saves quite a bit of space: 1010127 205064 1220 1216411 128f9b net/mac80211/mac80211.ko (before) 981199 205064 1220 1187483 121e9b net/mac80211/mac80211.ko (after) ------- -28928 = ~28KiB With a marginal space cost in debugfs: 8701 550 16 9267 2433 fs/debugfs/inode.o (before) 25233 325 32 25590 63f6 fs/debugfs/file.o (before) 8914 558 16 9488 2510 fs/debugfs/inode.o (after) 25380 325 32 25737 6489 fs/debugfs/file.o (after) --------------- +360 +8 (All on x86-64) A simple spatch suggests there are more than 300 instances, not even counting the ones hidden in macros like in mac80211, that could be trivially converted, for additional savings of about 240 bytes for each. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20241022151838.26f9925fb959.Ia80b55e934bbfc45ce0df42a3233d34b35508046@changeid Signed-off-by: Johannes Berg --- include/linux/debugfs.h | 62 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 0928a6c8ae1e..59444b495d49 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -71,9 +71,63 @@ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_lookup(const char *name, struct dentry *parent); -struct dentry *debugfs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); +struct debugfs_short_fops { + ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); + ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); + loff_t (*llseek) (struct file *, loff_t, int); +}; + +struct dentry *debugfs_create_file_full(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); +struct dentry *debugfs_create_file_short(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct debugfs_short_fops *fops); + +/** + * debugfs_create_file - create a file in the debugfs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the debugfs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations or struct debugfs_short_fops that + * should be used for this file. + * + * This is the basic "create a file" function for debugfs. It allows for a + * wide range of flexibility in creating a file, or a directory (if you want + * to create a directory, the debugfs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the debugfs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be + * returned. + * + * If debugfs is not enabled in the kernel, the value -%ENODEV will be + * returned. + * + * If fops points to a struct debugfs_short_fops, then simple_open() will be + * used for the open, and only read/write/llseek are supported and are proxied, + * so no module reference or release are needed. + * + * NOTE: it's expected that most callers should _ignore_ the errors returned + * by this function. Other debugfs functions handle the fact that the "dentry" + * passed to them could be an error and they don't crash in that case. + * Drivers should generally work fine even if debugfs fails to init anyway. + */ +#define debugfs_create_file(name, mode, parent, data, fops) \ + _Generic(fops, \ + const struct file_operations *: debugfs_create_file_full, \ + const struct debugfs_short_fops *: debugfs_create_file_short, \ + struct file_operations *: debugfs_create_file_full, \ + struct debugfs_short_fops *: debugfs_create_file_short) \ + (name, mode, parent, data, fops) + struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); @@ -207,7 +261,7 @@ static inline struct dentry *debugfs_lookup(const char *name, static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, - const struct file_operations *fops) + const void *fops) { return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From 26ff1fb02991e1260481185bb5ccab1ee498d5e4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Oct 2024 09:19:29 -0700 Subject: rcu: Delete unused rcu_gp_might_be_stalled() function The rcu_gp_might_be_stalled() function is no longer used, so this commit removes it. Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Signed-off-by: Frederic Weisbecker --- include/linux/rcutiny.h | 1 - include/linux/rcutree.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0ee270b3f5ed..fe42315f667f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -165,7 +165,6 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } static inline void rcu_momentary_eqs(void) { } static inline void kfree_rcu_scheduler_running(void) { } -static inline bool rcu_gp_might_be_stalled(void) { return false; } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 90a684f94776..27d86d912781 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,7 +40,6 @@ void kvfree_rcu_barrier(void); void rcu_barrier(void); void rcu_momentary_eqs(void); void kfree_rcu_scheduler_running(void); -bool rcu_gp_might_be_stalled(void); struct rcu_gp_oldstate { unsigned long rgos_norm; -- cgit v1.2.3 From b3aba04883de872488e5dabda199427b2bfa0395 Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Tue, 20 Aug 2024 11:26:16 +0530 Subject: dt-bindings: clock: qcom: gcc-ipq5332: remove q6 bring up clock macros Q6 firmware takes care of bringup clocks, so remove them. Signed-off-by: Manikanta Mylavarapu Reviewed-by: Krzysztof Kozlowski Signed-off-by: Gokul Sriram Palanisamy Link: https://lore.kernel.org/r/20240820055618.267554-4-quic_gokulsri@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq5332-gcc.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq5332-gcc.h b/include/dt-bindings/clock/qcom,ipq5332-gcc.h index 8a405a0a96d0..da9b507c30bf 100644 --- a/include/dt-bindings/clock/qcom,ipq5332-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq5332-gcc.h @@ -96,15 +96,7 @@ #define GCC_PCNOC_BFDCD_CLK_SRC 87 #define GCC_PCNOC_LPASS_CLK 88 #define GCC_PRNG_AHB_CLK 89 -#define GCC_Q6_AHB_CLK 90 -#define GCC_Q6_AHB_S_CLK 91 -#define GCC_Q6_AXIM_CLK 92 #define GCC_Q6_AXIM_CLK_SRC 93 -#define GCC_Q6_AXIS_CLK 94 -#define GCC_Q6_TSCTR_1TO2_CLK 95 -#define GCC_Q6SS_ATBM_CLK 96 -#define GCC_Q6SS_PCLKDBG_CLK 97 -#define GCC_Q6SS_TRIG_CLK 98 #define GCC_QDSS_AT_CLK 99 #define GCC_QDSS_AT_CLK_SRC 100 #define GCC_QDSS_CFG_AHB_CLK 101 @@ -134,7 +126,6 @@ #define GCC_SNOC_PCIE3_2LANE_S_CLK 125 #define GCC_SNOC_USB_CLK 126 #define GCC_SYS_NOC_AT_CLK 127 -#define GCC_SYS_NOC_WCSS_AHB_CLK 128 #define GCC_SYSTEM_NOC_BFDCD_CLK_SRC 129 #define GCC_UNIPHY0_AHB_CLK 130 #define GCC_UNIPHY0_SYS_CLK 131 @@ -155,17 +146,6 @@ #define GCC_USB0_PIPE_CLK 146 #define GCC_USB0_SLEEP_CLK 147 #define GCC_WCSS_AHB_CLK_SRC 148 -#define GCC_WCSS_AXIM_CLK 149 -#define GCC_WCSS_AXIS_CLK 150 -#define GCC_WCSS_DBG_IFC_APB_BDG_CLK 151 -#define GCC_WCSS_DBG_IFC_APB_CLK 152 -#define GCC_WCSS_DBG_IFC_ATB_BDG_CLK 153 -#define GCC_WCSS_DBG_IFC_ATB_CLK 154 -#define GCC_WCSS_DBG_IFC_NTS_BDG_CLK 155 -#define GCC_WCSS_DBG_IFC_NTS_CLK 156 -#define GCC_WCSS_ECAHB_CLK 157 -#define GCC_WCSS_MST_ASYNC_BDG_CLK 158 -#define GCC_WCSS_SLV_ASYNC_BDG_CLK 159 #define GCC_XO_CLK 160 #define GCC_XO_CLK_SRC 161 #define GCC_XO_DIV4_CLK 162 -- cgit v1.2.3 From da040d56031976144740bddba942485999f6a16f Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Tue, 20 Aug 2024 11:26:17 +0530 Subject: dt-bindings: clock: qcom: gcc-ipq9574: remove q6 bring up clock macros Q6 firmware takes care of bringup clocks, so remove them. Signed-off-by: Manikanta Mylavarapu Reviewed-by: Krzysztof Kozlowski Signed-off-by: Gokul Sriram Palanisamy Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240820055618.267554-5-quic_gokulsri@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq9574-gcc.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq9574-gcc.h b/include/dt-bindings/clock/qcom,ipq9574-gcc.h index 52123c5a09fa..f238aa4794a8 100644 --- a/include/dt-bindings/clock/qcom,ipq9574-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq9574-gcc.h @@ -132,16 +132,8 @@ #define GCC_NSSNOC_SNOC_1_CLK 123 #define GCC_QDSS_ETR_USB_CLK 124 #define WCSS_AHB_CLK_SRC 125 -#define GCC_Q6_AHB_CLK 126 -#define GCC_Q6_AHB_S_CLK 127 -#define GCC_WCSS_ECAHB_CLK 128 -#define GCC_WCSS_ACMT_CLK 129 -#define GCC_SYS_NOC_WCSS_AHB_CLK 130 #define WCSS_AXI_M_CLK_SRC 131 -#define GCC_ANOC_WCSS_AXI_M_CLK 132 #define QDSS_AT_CLK_SRC 133 -#define GCC_Q6SS_ATBM_CLK 134 -#define GCC_WCSS_DBG_IFC_ATB_CLK 135 #define GCC_NSSNOC_ATB_CLK 136 #define GCC_QDSS_AT_CLK 137 #define GCC_SYS_NOC_AT_CLK 138 @@ -154,27 +146,18 @@ #define QDSS_TRACECLKIN_CLK_SRC 145 #define GCC_QDSS_TRACECLKIN_CLK 146 #define QDSS_TSCTR_CLK_SRC 147 -#define GCC_Q6_TSCTR_1TO2_CLK 148 -#define GCC_WCSS_DBG_IFC_NTS_CLK 149 #define GCC_QDSS_TSCTR_DIV2_CLK 150 #define GCC_QDSS_TS_CLK 151 #define GCC_QDSS_TSCTR_DIV4_CLK 152 #define GCC_NSS_TS_CLK 153 #define GCC_QDSS_TSCTR_DIV8_CLK 154 #define GCC_QDSS_TSCTR_DIV16_CLK 155 -#define GCC_Q6SS_PCLKDBG_CLK 156 -#define GCC_Q6SS_TRIG_CLK 157 -#define GCC_WCSS_DBG_IFC_APB_CLK 158 -#define GCC_WCSS_DBG_IFC_DAPBUS_CLK 159 #define GCC_QDSS_DAP_CLK 160 #define GCC_QDSS_APB2JTAG_CLK 161 #define GCC_QDSS_TSCTR_DIV3_CLK 162 #define QPIC_IO_MACRO_CLK_SRC 163 #define GCC_QPIC_IO_MACRO_CLK 164 #define Q6_AXI_CLK_SRC 165 -#define GCC_Q6_AXIM_CLK 166 -#define GCC_WCSS_Q6_TBU_CLK 167 -#define GCC_MEM_NOC_Q6_AXI_CLK 168 #define Q6_AXIM2_CLK_SRC 169 #define NSSNOC_MEMNOC_BFDCD_CLK_SRC 170 #define GCC_NSSNOC_MEMNOC_CLK 171 @@ -199,7 +182,6 @@ #define GCC_UNIPHY2_SYS_CLK 190 #define GCC_CMN_12GPLL_SYS_CLK 191 #define GCC_NSSNOC_XO_DCD_CLK 192 -#define GCC_Q6SS_BOOT_CLK 193 #define UNIPHY_SYS_CLK_SRC 194 #define NSS_TS_CLK_SRC 195 #define GCC_ANOC_PCIE0_1LANE_M_CLK 196 -- cgit v1.2.3 From da09a9e0c3eab164af950be44ee6bdea8527c3e5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Oct 2024 22:22:51 +0200 Subject: uprobe: Add data pointer to consumer handlers Adding data pointer to both entry and exit consumer handlers and all its users. The functionality itself is coming in following change. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Oleg Nesterov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20241018202252.693462-2-jolsa@kernel.org --- include/linux/uprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2b294bf1881f..bb265a632b91 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -37,10 +37,10 @@ struct uprobe_consumer { * for the current process. If filter() is omitted or returns true, * UPROBE_HANDLER_REMOVE is effectively ignored. */ - int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data); int (*ret_handler)(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs); + struct pt_regs *regs, __u64 *data); bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); struct list_head cons_node; -- cgit v1.2.3 From 4d756095d3994cb41393817dc696b458938a6bd0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Oct 2024 22:22:52 +0200 Subject: uprobe: Add support for session consumer This change allows the uprobe consumer to behave as session which means that 'handler' and 'ret_handler' callbacks are connected in a way that allows to: - control execution of 'ret_handler' from 'handler' callback - share data between 'handler' and 'ret_handler' callbacks The session concept fits to our common use case where we do filtering on entry uprobe and based on the result we decide to run the return uprobe (or not). It's also convenient to share the data between session callbacks. To achive this we are adding new return value the uprobe consumer can return from 'handler' callback: UPROBE_HANDLER_IGNORE - Ignore 'ret_handler' callback for this consumer. And store cookie and pass it to 'ret_handler' when consumer has both 'handler' and 'ret_handler' callbacks defined. We store shared data in the return_consumer object array as part of the return_instance object. This way the handle_uretprobe_chain can find related return_consumer and its shared data. We also store entry handler return value, for cases when there are multiple consumers on single uprobe and some of them are ignored and some of them not, in which case the return probe gets installed and we need to have a way to find out which consumer needs to be ignored. The tricky part is when consumer is registered 'after' the uprobe entry handler is hit. In such case this consumer's 'ret_handler' gets executed as well, but it won't have the proper data pointer set, so we can filter it out. Suggested-by: Oleg Nesterov Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20241018202252.693462-3-jolsa@kernel.org --- include/linux/uprobes.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index bb265a632b91..dbaf04189548 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -23,8 +23,17 @@ struct inode; struct notifier_block; struct page; +/* + * Allowed return values from uprobe consumer's handler callback + * with following meaning: + * + * UPROBE_HANDLER_REMOVE + * - Remove the uprobe breakpoint from current->mm. + * UPROBE_HANDLER_IGNORE + * - Ignore ret_handler callback for this consumer. + */ #define UPROBE_HANDLER_REMOVE 1 -#define UPROBE_HANDLER_MASK 1 +#define UPROBE_HANDLER_IGNORE 2 #define MAX_URETPROBE_DEPTH 64 @@ -44,6 +53,8 @@ struct uprobe_consumer { bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); struct list_head cons_node; + + __u64 id; /* set when uprobe_consumer is registered */ }; #ifdef CONFIG_UPROBES @@ -83,14 +94,22 @@ struct uprobe_task { unsigned int depth; }; +struct return_consumer { + __u64 cookie; + __u64 id; +}; + struct return_instance { struct uprobe *uprobe; unsigned long func; unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ + int consumers_cnt; struct return_instance *next; /* keep as stack */ + + struct return_consumer consumers[] __counted_by(consumers_cnt); }; enum rp_check { -- cgit v1.2.3 From c8507a25cebd179db935dd266a33c51bef1b1e80 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 22 Oct 2024 13:03:47 -0700 Subject: drm/xe/oa/uapi: Define and parse OA sync properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have laid the groundwork, introduce OA sync properties in the uapi and parse the input xe_sync array as is done elsewhere in the driver. Also add DRM_XE_OA_CAPS_SYNCS bit in OA capabilities for userspace. v2: Fix and document DRM_XE_SYNC_TYPE_USER_FENCE for OA (Matt B) Add DRM_XE_OA_CAPS_SYNCS bit to OA capabilities (Jose) Acked-by: José Roberto de Souza Reviewed-by: Jonathan Cavitt Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-3-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c4182e95a619..4a8a4a63e99c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1485,6 +1485,7 @@ struct drm_xe_oa_unit { /** @capabilities: OA capabilities bit-mask */ __u64 capabilities; #define DRM_XE_OA_CAPS_BASE (1 << 0) +#define DRM_XE_OA_CAPS_SYNCS (1 << 1) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; @@ -1634,6 +1635,22 @@ enum drm_xe_oa_property_id { * to be disabled for the stream exec queue. */ DRM_XE_OA_PROPERTY_NO_PREEMPT, + + /** + * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array + * specified in @DRM_XE_OA_PROPERTY_SYNCS + */ + DRM_XE_OA_PROPERTY_NUM_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array + * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA + * configuration will wait till input fences signal. Output fences + * will signal after the new OA configuration takes effect. For + * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar + * to the VM bind case. + */ + DRM_XE_OA_PROPERTY_SYNCS, }; /** -- cgit v1.2.3 From ee1251fc0c4e799a48025318f262739919deb977 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 22 Oct 2024 11:49:45 +0000 Subject: cgroup/freezer: Reduce redundant traversal for cgroup_freeze MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whether a cgroup is frozen is determined solely by whether it is set to to be frozen and whether its parent is frozen. Currently, when is cgroup is frozen or unfrozen, it iterates through the entire subtree to freeze or unfreeze its descentdants. However, this is unesessary for a cgroup that does not change its effective frozen status. This path aims to skip the subtree if its parent does not have a change in effective freeze. For an example, subtree like, a-b-c-d-e-f-g, when a is frozen, the entire tree is frozen. If we freeze b and c again, it is unesessary to iterate d, e, f and g. So does that If we unfreeze b/c. Reviewed-by: Michal Koutný Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 0a80ef9191a6..1b20d2d8ef7c 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -398,7 +398,7 @@ struct cgroup_freezer_state { bool freeze; /* Should the cgroup actually be frozen? */ - int e_freeze; + bool e_freeze; /* Fields below are protected by css_set_lock */ -- cgit v1.2.3 From c0813ce2e5b0d1174782aff30d366509377abc7b Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Wed, 23 Oct 2024 11:46:48 -0700 Subject: dt-bindings: clock: imx93: Drop IMX93_CLK_END macro definition IMX93_CLK_END should be dropped as it is not part of the ABI. Signed-off-by: Pengfei Li Acked-by: Krzysztof Kozlowski Acked-by: Peng Fan Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20241023184651.381265-3-pengfei.li_1@nxp.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/imx93-clock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h index 787c9e74dc96..a1d0b326bb6b 100644 --- a/include/dt-bindings/clock/imx93-clock.h +++ b/include/dt-bindings/clock/imx93-clock.h @@ -204,6 +204,5 @@ #define IMX93_CLK_A55_SEL 199 #define IMX93_CLK_A55_CORE 200 #define IMX93_CLK_PDM_IPG 201 -#define IMX93_CLK_END 202 #endif -- cgit v1.2.3 From f029d870096fcd8565a2ee8c2d0078b9aaec4fdb Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Wed, 23 Oct 2024 11:46:49 -0700 Subject: dt-bindings: clock: Add i.MX91 clock support i.MX91 has similar Clock Control Module(CCM) design as i.MX93, only add few new clock compared to i.MX93. Add a new compatible string and some new clocks for i.MX91. Signed-off-by: Pengfei Li Reviewed-by: Frank Li Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241023184651.381265-4-pengfei.li_1@nxp.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/imx93-clock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h index a1d0b326bb6b..6c685067288b 100644 --- a/include/dt-bindings/clock/imx93-clock.h +++ b/include/dt-bindings/clock/imx93-clock.h @@ -204,5 +204,10 @@ #define IMX93_CLK_A55_SEL 199 #define IMX93_CLK_A55_CORE 200 #define IMX93_CLK_PDM_IPG 201 +#define IMX91_CLK_ENET1_QOS_TSN 202 +#define IMX91_CLK_ENET_TIMER 203 +#define IMX91_CLK_ENET2_REGULAR 204 +#define IMX91_CLK_ENET2_REGULAR_GATE 205 +#define IMX91_CLK_ENET1_QOS_TSN_GATE 206 #endif -- cgit v1.2.3 From 04af8a399fa40310f831b4f1dc9f757085f41983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:48 +0300 Subject: PCI: Protect Link Control 2 Register with RMW locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe Bandwidth Controller performs RMW accesses the Link Control 2 Register which can occur concurrently to other sources of Link Control 2 Register writes. Therefore, add Link Control 2 Register among the PCI Express Capability Registers that need RMW locking. Link: https://lore.kernel.org/r/20241018144755.7875-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..be5ed534c39c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1274,6 +1274,7 @@ static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, { switch (pos) { case PCI_EXP_LNKCTL: + case PCI_EXP_LNKCTL2: case PCI_EXP_RTCTL: return pcie_capability_clear_and_set_word_locked(dev, pos, clear, set); -- cgit v1.2.3 From 52e0874fc16bd26e9ea1871e30ffb2c6dff187cf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:02 +0200 Subject: locking/rt: Add sparse annotation PREEMPT_RT's sleeping locks. The sleeping locks on PREEMPT_RT (rt_spin_lock() and friends) lack sparse annotation. Therefore a missing spin_unlock() won't be spotted by sparse in a PREEMPT_RT build while it is noticed on a !PREEMPT_RT build. Add the __acquires/__releases macros to the lock/ unlock functions. The trylock functions already use the __cond_lock() wrapper. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-2-bigeasy@linutronix.de --- include/linux/rwlock_rt.h | 10 +++++----- include/linux/spinlock_rt.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index 8544ff05e594..7d81fc6918ee 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -24,13 +24,13 @@ do { \ __rt_rwlock_init(rwl, #rwl, &__key); \ } while (0) -extern void rt_read_lock(rwlock_t *rwlock); +extern void rt_read_lock(rwlock_t *rwlock) __acquires(rwlock); extern int rt_read_trylock(rwlock_t *rwlock); -extern void rt_read_unlock(rwlock_t *rwlock); -extern void rt_write_lock(rwlock_t *rwlock); -extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass); +extern void rt_read_unlock(rwlock_t *rwlock) __releases(rwlock); +extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock); +extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock); extern int rt_write_trylock(rwlock_t *rwlock); -extern void rt_write_unlock(rwlock_t *rwlock); +extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock); static __always_inline void read_lock(rwlock_t *rwlock) { diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 61c49b16f69a..babc3e028779 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -32,10 +32,10 @@ do { \ __rt_spin_lock_init(slock, #slock, &__key, true); \ } while (0) -extern void rt_spin_lock(spinlock_t *lock); -extern void rt_spin_lock_nested(spinlock_t *lock, int subclass); -extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); -extern void rt_spin_unlock(spinlock_t *lock); +extern void rt_spin_lock(spinlock_t *lock) __acquires(lock); +extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock); +extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock); +extern void rt_spin_unlock(spinlock_t *lock) __releases(lock); extern void rt_spin_lock_unlock(spinlock_t *lock); extern int rt_spin_trylock_bh(spinlock_t *lock); extern int rt_spin_trylock(spinlock_t *lock); -- cgit v1.2.3 From b1f01f9e54b1aaadb6740f86017e8fabdee77fe2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:03 +0200 Subject: locking/rt: Remove one __cond_lock() in RT's spin_trylock_irqsave() spin_trylock_irqsave() has a __cond_lock() wrapper which points to __spin_trylock_irqsave(). The function then invokes spin_trylock() which has another __cond_lock() finally pointing to rt_spin_trylock(). The compiler has no problem to parse this but sparse does not recognise that users of spin_trylock_irqsave() acquire a conditional lock and complains. Remove one layer of __cond_lock() so that sparse recognises conditional locking. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-3-bigeasy@linutronix.de --- include/linux/spinlock_rt.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index babc3e028779..f9f14e135be7 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -132,7 +132,7 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, #define spin_trylock_irq(lock) \ __cond_lock(lock, rt_spin_trylock(lock)) -#define __spin_trylock_irqsave(lock, flags) \ +#define spin_trylock_irqsave(lock, flags) \ ({ \ int __locked; \ \ @@ -142,9 +142,6 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, __locked; \ }) -#define spin_trylock_irqsave(lock, flags) \ - __cond_lock(lock, __spin_trylock_irqsave(lock, flags)) - #define spin_is_contended(lock) (((void)(lock), 0)) static inline int spin_is_locked(spinlock_t *lock) -- cgit v1.2.3 From cdda1f26e74bac732eca537a69f19f6a37b641be Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 10 Oct 2024 16:52:32 +0100 Subject: pidfd: add ioctl to retrieve pid info A common pattern when using pid fds is having to get information about the process, which currently requires /proc being mounted, resolving the fd to a pid, and then do manual string parsing of /proc/N/status and friends. This needs to be reimplemented over and over in all userspace projects (e.g.: I have reimplemented resolving in systemd, dbus, dbus-daemon, polkit so far), and requires additional care in checking that the fd is still valid after having parsed the data, to avoid races. Having a programmatic API that can be used directly removes all these requirements, including having /proc mounted. As discussed at LPC24, add an ioctl with an extensible struct so that more parameters can be added later if needed. Start with returning pid/tgid/ppid and creds unconditionally, and cgroupid optionally. Signed-off-by: Luca Boccassi Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 565fc0629fff..4540f6301b8c 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -16,6 +16,55 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +/* Flags for pidfd_info. */ +#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */ +#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ +#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ + +#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ + +struct pidfd_info { + /* + * This mask is similar to the request_mask in statx(2). + * + * Userspace indicates what extensions or expensive-to-calculate fields + * they want by setting the corresponding bits in mask. The kernel + * will ignore bits that it does not know about. + * + * When filling the structure, the kernel will only set bits + * corresponding to the fields that were actually filled by the kernel. + * This also includes any future extensions that might be automatically + * filled. If the structure size is too small to contain a field + * (requested or not), to avoid confusion the mask will not + * contain a bit for that field. + * + * As such, userspace MUST verify that mask contains the + * corresponding flags after the ioctl(2) returns to ensure that it is + * using valid data. + */ + __u64 mask; + /* + * The information contained in the following fields might be stale at the + * time it is received, as the target process might have exited as soon as + * the IOCTL was processed, and there is no way to avoid that. However, it + * is guaranteed that if the call was successful, then the information was + * correct and referred to the intended process at the time the work was + * performed. */ + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; + #define PIDFS_IOCTL_MAGIC 0xFF #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) @@ -28,5 +77,6 @@ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) #endif /* _UAPI_LINUX_PIDFD_H */ -- cgit v1.2.3 From 9a9f7e13952b2638bc57bc9b34e6bdd106509836 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Fri, 18 Oct 2024 18:53:18 +0800 Subject: mmc: core: Support UHS-II card control and access Embed UHS-II access/control functionality into the MMC request processing flow. Signed-off-by: Jason Lai Signed-off-by: Victor Shih Message-ID: <20241018105333.4569-2-victorshihgli@gmail.com> [Ulf: A couple of cleanups and fixed sd_uhs2_power_off()] Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 17 +++++++++++++++++ include/linux/mmc/host.h | 15 +++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index a890a71288ef..56972bd78462 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -11,6 +11,20 @@ struct mmc_data; struct mmc_request; +#define UHS2_MAX_PAYLOAD_LEN 2 +#define UHS2_MAX_RESP_LEN 20 + +struct uhs2_command { + u16 header; + u16 arg; + __be32 payload[UHS2_MAX_PAYLOAD_LEN]; + u8 payload_len; + u8 packet_len; + u8 tmode_half_duplex; + u8 uhs2_resp[UHS2_MAX_RESP_LEN]; /* UHS2 native cmd resp */ + u8 uhs2_resp_len; /* UHS2 native cmd resp len */ +}; + struct mmc_command { u32 opcode; u32 arg; @@ -97,6 +111,8 @@ struct mmc_command { struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ + struct uhs2_command *uhs2_cmd; /* UHS2 command */ + /* for SDUC */ bool has_ext_addr; u8 ext_addr; @@ -158,6 +174,7 @@ struct mmc_request { const struct bio_crypt_ctx *crypto_ctx; int crypto_key_slot; #endif + struct uhs2_command uhs2_cmd; }; struct mmc_card; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0980d06ed419..f166d6611ddb 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -127,6 +127,13 @@ struct sd_uhs2_caps { }; enum sd_uhs2_operation { + UHS2_PHY_INIT = 0, + UHS2_SET_CONFIG, + UHS2_ENABLE_INT, + UHS2_DISABLE_INT, + UHS2_ENABLE_CLK, + UHS2_DISABLE_CLK, + UHS2_CHECK_DORMANT, UHS2_SET_IOS, }; @@ -453,6 +460,8 @@ struct mmc_host { #endif #define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */ + bool uhs2_sd_tran; /* UHS-II flag for SD_TRAN state */ + bool uhs2_app_cmd; /* UHS-II flag for APP command */ struct sd_uhs2_caps uhs2_caps; /* Host UHS-II capabilities */ int fixed_drv_type; /* fixed driver type for non-removable media */ @@ -714,6 +723,12 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, host->err_stats[stat] += 1; } +static inline int mmc_card_uhs2_hd_mode(struct mmc_host *host) +{ + return host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD; +} + int mmc_sd_switch(struct mmc_card *card, bool mode, int group, u8 value, u8 *resp); int mmc_send_status(struct mmc_card *card, u32 *status); -- cgit v1.2.3 From a5a98a786e5e31e85a69d30935254e8730734706 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Oct 2024 16:59:46 +0200 Subject: thermal: core: Add and use cooling device guard Add and use a special guard for cooling devices. This allows quite a few error code paths to be simplified among other things and brings in code size reduction for a good measure. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/5837621.DvuYhMxLoT@rjwysocki.net Reviewed-by: Lukasz Luba --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index bcaa92732e14..754802478b96 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -140,6 +140,9 @@ struct thermal_cooling_device { #endif }; +DEFINE_GUARD(cooling_dev, struct thermal_cooling_device *, mutex_lock(&_T->lock), + mutex_unlock(&_T->lock)) + /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { const char *governor_name; -- cgit v1.2.3 From 1773572863c43a14a3e45f0591f28b7dec1ee52a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 22 Oct 2024 17:51:42 +0200 Subject: thermal: netlink: Add the commands and the events for the thresholds The thresholds exist but there is no notification neither action code related to them yet. These changes implement the netlink for the notifications when the thresholds are crossed, added, deleted or flushed as well as the commands which allows to get the list of the thresholds, flush them, add and delete. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20241022155147.463475-3-daniel.lezcano@linaro.org [ rjw: Use the thermal_zone guard for locking, subject edit ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 2e6f60a36173..ba8604bdf206 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -20,7 +20,7 @@ enum thermal_trip_type { /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal" -#define THERMAL_GENL_VERSION 0x01 +#define THERMAL_GENL_VERSION 0x02 #define THERMAL_GENL_SAMPLING_GROUP_NAME "sampling" #define THERMAL_GENL_EVENT_GROUP_NAME "event" @@ -30,6 +30,7 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_TZ, THERMAL_GENL_ATTR_TZ_ID, THERMAL_GENL_ATTR_TZ_TEMP, + THERMAL_GENL_ATTR_TZ_PREV_TEMP, THERMAL_GENL_ATTR_TZ_TRIP, THERMAL_GENL_ATTR_TZ_TRIP_ID, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, @@ -50,6 +51,9 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, + THERMAL_GENL_ATTR_THRESHOLD, + THERMAL_GENL_ATTR_THRESHOLD_TEMP, + THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -77,6 +81,11 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ + THERMAL_GENL_EVENT_THRESHOLD_ADD, /* A thresold has been added */ + THERMAL_GENL_EVENT_THRESHOLD_DELETE, /* A thresold has been deleted */ + THERMAL_GENL_EVENT_THRESHOLD_FLUSH, /* All thresolds have been deleted */ + THERMAL_GENL_EVENT_THRESHOLD_UP, /* A thresold has been crossed the way up */ + THERMAL_GENL_EVENT_THRESHOLD_DOWN, /* A thresold has been crossed the way down */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) @@ -84,12 +93,16 @@ enum thermal_genl_event { /* Commands supported by the thermal_genl_family */ enum thermal_genl_cmd { THERMAL_GENL_CMD_UNSPEC, - THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ - THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ - THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ - THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ - THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ - THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ + THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ + THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ + THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ + THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ + THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ + THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ + THERMAL_GENL_CMD_THRESHOLD_GET, /* List of thresholds */ + THERMAL_GENL_CMD_THRESHOLD_ADD, /* Add a threshold */ + THERMAL_GENL_CMD_THRESHOLD_DELETE, /* Delete a threshold */ + THERMAL_GENL_CMD_THRESHOLD_FLUSH, /* Flush all the thresholds */ __THERMAL_GENL_CMD_MAX, }; #define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) -- cgit v1.2.3 From 68ed5c38b512b734caf3da1f87db4a99fcfe3002 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:33 -0700 Subject: phonet: Pass net and ifindex to phonet_address_notify(). Currently, phonet_address_notify() fetches netns and ifindex from dev. Once addr_doit() is converted to RCU, phonet_address_notify() will be called outside of RCU due to GFP_KERNEL, and dev will be unavailable there. Let's pass net and ifindex to phonet_address_notify(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index e9dc8dca5817..6b2102b4ece3 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -38,7 +38,7 @@ int phonet_address_add(struct net_device *dev, u8 addr); int phonet_address_del(struct net_device *dev, u8 addr); u8 phonet_address_get(struct net_device *dev, u8 addr); int phonet_address_lookup(struct net *net, u8 addr); -void phonet_address_notify(int event, struct net_device *dev, u8 addr); +void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); -- cgit v1.2.3 From 42f5fe1dc4babad1c49bcc4121983fffccee3cd9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:34 -0700 Subject: phonet: Convert phonet_device_list.lock to spinlock_t. addr_doit() calls phonet_address_add() or phonet_address_del() for RTM_NEWADDR or RTM_DELADDR, respectively. Both functions only touch phonet_device_list(dev_net(dev)), which is currently protected by RTNL and its dedicated mutex, phonet_device_list.lock. We will convert addr_doit() to RCU and cannot use mutex inside RCU. Let's convert the mutex to spinlock_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 6b2102b4ece3..ac0331d83a81 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -12,12 +12,13 @@ #include #include +#include struct net; struct phonet_device_list { struct list_head list; - struct mutex lock; + spinlock_t lock; }; struct phonet_device_list *phonet_device_list(struct net *net); -- cgit v1.2.3 From de51ad08b1177bbbb8b60cb7dd4c3c5dd50d262f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:38 -0700 Subject: phonet: Pass net and ifindex to rtm_phonet_notify(). Currently, rtm_phonet_notify() fetches netns and ifindex from dev. Once route_doit() is converted to RCU, rtm_phonet_notify() will be called outside of RCU due to GFP_KERNEL, and dev will be unavailable there. Let's pass net and ifindex to rtm_phonet_notify(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index ac0331d83a81..021e524fd20a 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -43,7 +43,7 @@ void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); -void rtm_phonet_notify(int event, struct net_device *dev, u8 dst); +void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst); struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr); struct net_device *phonet_route_output(struct net *net, u8 daddr); -- cgit v1.2.3 From 3deec3b4afb4c767007eae1eeedbcf3da599395b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:39 -0700 Subject: phonet: Convert phonet_routes.lock to spinlock_t. route_doit() calls phonet_route_add() or phonet_route_del() for RTM_NEWROUTE or RTM_DELROUTE, respectively. Both functions only touch phonet_pernet(dev_net(dev))->routes, which is currently protected by RTNL and its dedicated mutex, phonet_routes.lock. We will convert route_doit() to RCU and cannot use mutex inside RCU. Let's convert the mutex to spinlock_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 021e524fd20a..37a3e83531c6 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -11,7 +11,6 @@ #define PN_DEV_H #include -#include #include struct net; -- cgit v1.2.3 From a6021aa24f6417416d93318bbfa022ab229c33c8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Oct 2024 06:18:17 +0000 Subject: ACPI: EC: make EC support compile-time conditional The embedded controller code is mainly used on x86 laptops and cannot work without PC style I/O port access. Make this a user-visible configuration option that is default enabled on x86 but otherwise disabled, and that can never be enabled unless CONFIG_HAS_IOPORT is also available. The empty stubs in internal.h help ignore the EC code in configurations that don't support it. In order to see those stubs, the sbshc code also has to include this header and drop duplicate declarations. All the direct callers of ec_read/ec_write already had an x86 dependency and now also need to depend on APCI_EC. Signed-off-by: Arnd Bergmann Acked-by: Guenter Roeck Acked-by: Hans de Goede Link: https://patch.msgid.link/20241011061948.3211423-1-arnd@kernel.org [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d5ee84c468b..7dd24acd9ffe 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1164,8 +1164,6 @@ int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); -void acpi_ec_mark_gpe_for_wake(void); -void acpi_ec_set_gpe_wake_mask(u8 action); int acpi_subsys_restore_early(struct device *dev); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } @@ -1176,6 +1174,12 @@ static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } static inline int acpi_subsys_restore_early(struct device *dev) { return 0; } +#endif + +#if defined(CONFIG_ACPI_EC) && defined(CONFIG_PM_SLEEP) +void acpi_ec_mark_gpe_for_wake(void); +void acpi_ec_set_gpe_wake_mask(u8 action); +#else static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif -- cgit v1.2.3 From 1cb80d9e93f861018fabe81a69ea0ded20f5a2d0 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 23 Oct 2024 16:47:48 -0700 Subject: bpf: Support __uptr type tag in BTF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces the "__uptr" type tag to BTF. It is to define a pointer pointing to the user space memory. This patch adds BTF logic to pass the "__uptr" type tag. btf_find_kptr() is reused for the "__uptr" tag. The "__uptr" will only be supported in the map_value of the task storage map. However, btf_parse_struct_meta() also uses btf_find_kptr() but it is not interested in "__uptr". This patch adds a "field_mask" argument to btf_find_kptr() which will return BTF_FIELD_IGNORE if the caller is not interested in a “__uptr” field. btf_parse_kptr() is also reused to parse the uptr. The btf_check_and_fixup_fields() is changed to do extra checks on the uptr to ensure that its struct size is not larger than PAGE_SIZE. It is not clear how a uptr pointing to a CO-RE supported kernel struct will be used, so it is also not allowed now. Signed-off-by: Kui-Feng Lee Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241023234759.860539-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0c216e71cec7..bb31bc6d0c4d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -203,6 +203,7 @@ enum btf_field_type { BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), BPF_WORKQUEUE = (1 << 10), + BPF_UPTR = (1 << 11), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -322,6 +323,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "kptr"; case BPF_KPTR_PERCPU: return "percpu_kptr"; + case BPF_UPTR: + return "uptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -350,6 +353,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -379,6 +383,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); -- cgit v1.2.3 From b9a5a07aeaa2a903fb1306eb422880b2fa5f937f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 23 Oct 2024 16:47:50 -0700 Subject: bpf: Add "bool swap_uptrs" arg to bpf_local_storage_update() and bpf_selem_alloc() In a later patch, the task local storage will only accept uptr from the syscall update_elem and will not accept uptr from the bpf prog. The reason is the bpf prog does not have a way to provide a valid user space address. bpf_local_storage_update() and bpf_selem_alloc() are used by both bpf prog bpf_task_storage_get(BPF_LOCAL_STORAGE_GET_F_CREATE) and bpf syscall update_elem. "bool swap_uptrs" arg is added to bpf_local_storage_update() and bpf_selem_alloc() to tell if it is called by the bpf prog or by the bpf syscall. When swap_uptrs==true, it is called by the syscall. The arg is named (swap_)uptrs because the later patch will swap the uptrs between the newly allocated selem and the user space provided map_value. It will make error handling easier in case map->ops->map_update_elem() fails and the caller can decide if it needs to unpin the uptr in the user space provided map_value or the bpf_local_storage_update() has already taken the uptr ownership and will take care of unpinning it also. Only swap_uptrs==false is passed now. The logic to handle the true case will be added in a later patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241023234759.860539-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index dcddb0aef7d8..0c7216c065d5 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -181,7 +181,7 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool charge_mem, gfp_t gfp_flags); + bool charge_mem, bool swap_uptrs, gfp_t gfp_flags); void bpf_selem_free(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *smap, @@ -195,7 +195,7 @@ bpf_local_storage_alloc(void *owner, struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags, gfp_t gfp_flags); + void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags); u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); -- cgit v1.2.3 From 5bd5bab76669b1e1551f03f5fcbc165f3fa8d269 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 23 Oct 2024 16:47:51 -0700 Subject: bpf: Postpone bpf_selem_free() in bpf_selem_unlink_storage_nolock() In a later patch, bpf_selem_free() will call unpin_user_page() through bpf_obj_free_fields(). unpin_user_page() may take spin_lock. However, some bpf_selem_free() call paths have held a raw_spin_lock. Like this: raw_spin_lock_irqsave() bpf_selem_unlink_storage_nolock() bpf_selem_free() unpin_user_page() spin_lock() To avoid spinlock nested in raw_spinlock, bpf_selem_free() should be done after releasing the raw_spinlock. The "bool reuse_now" arg is replaced with "struct hlist_head *free_selem_list" in bpf_selem_unlink_storage_nolock(). The bpf_selem_unlink_storage_nolock() will append the to-be-free selem at the free_selem_list. The caller of bpf_selem_unlink_storage_nolock() will need to call the new bpf_selem_free_list(free_selem_list, reuse_now) to free the selem after releasing the raw_spinlock. Note that the selem->snode cannot be reused for linking to the free_selem_list because the selem->snode is protected by the raw_spinlock that we want to avoid holding. A new "struct hlist_node free_node;" is union-ized with the rcu_head. Only the first one successfully hlist_del_init_rcu(&selem->snode) will be able to use the free_node. After succeeding hlist_del_init_rcu(&selem->snode), the free_node and rcu_head usage is serialized such that they can share the 16 bytes in a union. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241023234759.860539-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 0c7216c065d5..ab7244d8108f 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -77,7 +77,13 @@ struct bpf_local_storage_elem { struct hlist_node map_node; /* Linked to bpf_local_storage_map */ struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct hlist_node free_node; /* used to postpone + * bpf_selem_free + * after raw_spin_unlock + */ + }; /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. -- cgit v1.2.3 From ba512b00e5efbf7e19cfb7fa9f66ce82669b7077 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 23 Oct 2024 16:47:53 -0700 Subject: bpf: Add uptr support in the map_value of the task local storage. This patch adds uptr support in the map_value of the task local storage. struct map_value { struct user_data __uptr *uptr; }; struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); __type(key, int); __type(value, struct value_type); } datamap SEC(".maps"); A new bpf_obj_pin_uptrs() is added to pin the user page and also stores the kernel address back to the uptr for the bpf prog to use later. It currently does not support the uptr pointing to a user struct across two pages. It also excludes PageHighMem support to keep it simple. As of now, the 32bit bpf jit is missing other more crucial bpf features. For example, many important bpf features depend on bpf kfunc now but so far only one arch (x86-32) supports it which was added by me as an example when kfunc was first introduced to bpf. The uptr can only be stored to the task local storage by the syscall update_elem. Meaning the uptr will not be considered if it is provided by the bpf prog through bpf_task_storage_get(BPF_LOCAL_STORAGE_GET_F_CREATE). This is enforced by only calling bpf_local_storage_update(swap_uptrs==true) in bpf_pid_task_storage_update_elem. Everywhere else will have swap_uptrs==false. This will pump down to bpf_selem_alloc(swap_uptrs==true). It is the only case that bpf_selem_alloc() will take the uptr value when updating the newly allocated selem. bpf_obj_swap_uptrs() is added to swap the uptr between the SDATA(selem)->data and the user provided map_value in "void *value". bpf_obj_swap_uptrs() makes the SDATA(selem)->data takes the ownership of the uptr and the user space provided map_value will have NULL in the uptr. The bpf_obj_unpin_uptrs() is called after map->ops->map_update_elem() returning error. If the map->ops->map_update_elem has reached a state that the local storage has taken the uptr ownership, the bpf_obj_unpin_uptrs() will be a no op because the uptr is NULL. A "__"bpf_obj_unpin_uptrs is added to make this error path unpin easier such that it does not have to check the map->record is NULL or not. BPF_F_LOCK is not supported when the map_value has uptr. This can be revisited later if there is a use case. A similar swap_uptrs idea can be considered. The final bit is to do unpin_user_page in the bpf_obj_free_fields(). The earlier patch has ensured that the bpf_obj_free_fields() has gone through the rcu gp when needed. Cc: linux-mm@kvack.org Cc: Shakeel Butt Signed-off-by: Martin KaFai Lau Acked-by: Shakeel Butt Link: https://lore.kernel.org/r/20241023234759.860539-7-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bb31bc6d0c4d..8888689aa917 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -424,6 +424,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: break; default: WARN_ON_ONCE(1); @@ -512,6 +513,25 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } +static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src) +{ + unsigned long *src_uptr, *dst_uptr; + const struct btf_field *field; + int i; + + if (!btf_record_has_field(rec, BPF_UPTR)) + return; + + for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + src_uptr = src + field->offset; + dst_uptr = dst + field->offset; + swap(*src_uptr, *dst_uptr); + } +} + static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; -- cgit v1.2.3 From 3ced1c68751299c0cdf6a1ceeafdbe77db7d4956 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 19 Oct 2024 00:49:12 +0300 Subject: drm/display: bridge_connector: handle ycbcr_420_allowed Follow the interlace_allowed example and calculate drm_connector's ycbcr_420_allowed flag as AND of all drm_bridge's ycbcr_420_allowed flags in a chain. This is one of the gaps between several bridge-specific connector implementations and drm_bridge_connector. Reviewed-by: Neil Armstrong Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20241019-bridge-yuv420-v1-1-d74efac9e4e6@linaro.org Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 75019d16be64..e8d735b7f6a4 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -802,6 +802,11 @@ struct drm_bridge { * modes. */ bool interlace_allowed; + /** + * @ycbcr_420_allowed: Indicate that the bridge can handle YCbCr 420 + * output. + */ + bool ycbcr_420_allowed; /** * @pre_enable_prev_first: The bridge requires that the prev * bridge @pre_enable function is called before its @pre_enable, -- cgit v1.2.3 From 09d6775f503b393d0457c7126aa43208e1724004 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 16 Oct 2024 13:27:45 -0700 Subject: riscv: Add support for userspace pointer masking RISC-V supports pointer masking with a variable number of tag bits (which is called "PMLEN" in the specification) and which is configured at the next higher privilege level. Wire up the PR_SET_TAGGED_ADDR_CTRL and PR_GET_TAGGED_ADDR_CTRL prctls so userspace can request a lower bound on the number of tag bits and determine the actual number of tag bits. As with arm64's PR_TAGGED_ADDR_ENABLE, the pointer masking configuration is thread-scoped, inherited on clone() and fork() and cleared on execve(). Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241016202814.4061541-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- include/uapi/linux/prctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..cefd656ebf43 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From 78844482a1c939a972681842f8ee2a8ddb202441 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 16 Oct 2024 13:27:47 -0700 Subject: riscv: Allow ptrace control of the tagged address ABI This allows a tracer to control the ABI of the tracee, as on arm64. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241016202814.4061541-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..a920cf8934dc 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -450,6 +450,7 @@ typedef struct elf64_shdr { #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ #define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ #define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ #define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ -- cgit v1.2.3 From 02ac5f9d6caec96071103f7c62b5117526e47b64 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Oct 2024 02:20:02 +0000 Subject: of: property: add of_graph_get_next_port() We have endpoint base functions - of_graph_get_next_endpoint() - of_graph_get_endpoint_count() - for_each_endpoint_of_node() Here, for_each_endpoint_of_node() loop finds each endpoints ports { port@0 { (1) endpoint {...}; }; port@1 { (2) endpoint {...}; }; ... }; In above case, it finds endpoint as (1) -> (2) -> ... Basically, user/driver knows which port is used for what, but not in all cases. For example on flexible/generic driver case, how many ports are used is not fixed. For example Sound Generic Card driver which is very flexible/generic and used from many venders can't know how many ports are used, and used for what, because it depends on each vender SoC and/or its used board. And more, the port can have multi endpoints. For example Generic Sound Card case, it supports many type of connection between CPU / Codec, and some of them uses multi endpoint in one port. see below. ports { (A) port@0 { (1) endpoint@0 {...}; (2) endpoint@1 {...}; }; (B) port@1 { (3) endpoint {...}; }; ... }; Generic Sound Card want to handle each connection via "port" base instead of "endpoint" base. But, it is very difficult to handle each "port" via existing for_each_endpoint_of_node(). Because getting each "port" via of_get_parent() from each "endpoint" doesn't work. For example in above case, both (1) (2) endpoint has same "port" (= A). Add "port" base functions. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87ldyeb5t9.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- include/linux/of_graph.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index a4bea62bfa29..44518f3583a4 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OF_GRAPH_H #define __LINUX_OF_GRAPH_H +#include #include #include @@ -37,14 +38,29 @@ struct of_endpoint { for (child = of_graph_get_next_endpoint(parent, NULL); child != NULL; \ child = of_graph_get_next_endpoint(parent, child)) +/** + * for_each_of_graph_port - iterate over every port in a device or ports node + * @parent: parent device or ports node containing port + * @child: loop variable pointing to the current port node + * + * When breaking out of the loop, and continue to use the @child, you need to + * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it. + */ +#define for_each_of_graph_port(parent, child) \ + for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\ + child != NULL; child = of_graph_get_next_port(parent, child)) + #ifdef CONFIG_OF bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); unsigned int of_graph_get_endpoint_count(const struct device_node *np); +unsigned int of_graph_get_port_count(struct device_node *np); struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); +struct device_node *of_graph_get_next_port(const struct device_node *parent, + struct device_node *port); struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_endpoint( @@ -73,6 +89,11 @@ static inline unsigned int of_graph_get_endpoint_count(const struct device_node return 0; } +static inline unsigned int of_graph_get_port_count(struct device_node *np) +{ + return 0; +} + static inline struct device_node *of_graph_get_port_by_id( struct device_node *node, u32 id) { @@ -86,6 +107,13 @@ static inline struct device_node *of_graph_get_next_endpoint( return NULL; } +static inline struct device_node *of_graph_get_next_port( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + static inline struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { -- cgit v1.2.3 From 58fe47d6ac7413172e1fa88324fb1b4c0eb2c0a2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Oct 2024 02:20:06 +0000 Subject: of: property: add of_graph_get_next_port_endpoint() We already have of_graph_get_next_endpoint(), but it is not intuitive to use in some case. (X) node { (Y) ports { (P0) port@0 { endpoint { remote-endpoint = ...; };}; (P10) port@1 { endpoint { remote-endpoint = ...; }; (P11) endpoint { remote-endpoint = ...; };}; (P2) port@2 { endpoint { remote-endpoint = ...; };}; }; }; For example, if I want to handle port@1's 2 endpoints (= P10, P11), I want to use like below P10 = of_graph_get_next_endpoint(port1, NULL); P11 = of_graph_get_next_endpoint(port1, P10); But 1st one will be error, because of_graph_get_next_endpoint() requested 1st parameter is "node" (X) or "ports" (Y), not but "port". Below works well, but it will get P0 P0 = of_graph_get_next_endpoint(node, NULL); P0 = of_graph_get_next_endpoint(ports, NULL); In other words, we can't handle P10/P11 directly via of_graph_get_next_endpoint(). There is another non intuitive behavior on of_graph_get_next_endpoint(). In case of if I could get P10 pointer for some way, and if I want to handle port@1 things by loop, I would like use it like below /* * "ep" is now P10, and handle port1 things here, * but we don't know how many endpoints port1 have. * * Because "ep" is non NULL now, we can use port1 * as of_graph_get_next_endpoint(port1, xxx) */ do { /* do something for port1 specific things here */ } while (ep = of_graph_get_next_endpoint(port1, ep)) But it also not worked as I expected. I expect it will be P10 -> P11 -> NULL, but it will be P10 -> P11 -> P2, because of_graph_get_next_endpoint() will fetch "endpoint" beyond the "port". It is not useful for generic driver. To handle endpoint more intuitive, create of_graph_get_next_port_endpoint() of_graph_get_next_port_endpoint(port1, NULL); // P10 of_graph_get_next_port_endpoint(port1, P10); // P11 of_graph_get_next_port_endpoint(port1, P11); // NULL Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87jzdyb5t5.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- include/linux/of_graph.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 44518f3583a4..a692d9d979a6 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -50,6 +50,18 @@ struct of_endpoint { for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\ child != NULL; child = of_graph_get_next_port(parent, child)) +/** + * for_each_of_graph_port_endpoint - iterate over every endpoint in a port node + * @parent: parent port node + * @child: loop variable pointing to the current endpoint node + * + * When breaking out of the loop, and continue to use the @child, you need to + * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it. + */ +#define for_each_of_graph_port_endpoint(parent, child) \ + for (struct device_node *child __free(device_node) = of_graph_get_next_port_endpoint(parent, NULL);\ + child != NULL; child = of_graph_get_next_port_endpoint(parent, child)) + #ifdef CONFIG_OF bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, @@ -61,6 +73,8 @@ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); struct device_node *of_graph_get_next_port(const struct device_node *parent, struct device_node *port); +struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, + struct device_node *prev); struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_endpoint( @@ -114,6 +128,13 @@ static inline struct device_node *of_graph_get_next_port( return NULL; } +static inline struct device_node *of_graph_get_next_port_endpoint( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + static inline struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { -- cgit v1.2.3 From 78cd57bbb4529690d152a5dce409dd91fa1a8b9b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Oct 2024 02:20:35 +0000 Subject: fbdev: omapfb: use new of_graph functions Now we can use new port related functions for port parsing. Use it. Signed-off-by: Kuninori Morimoto Acked-by: Helge Deller Link: https://lore.kernel.org/r/87bjzab5sd.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- include/video/omapfb_dss.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/video/omapfb_dss.h b/include/video/omapfb_dss.h index a8c0c3eeeb5b..d133190e3143 100644 --- a/include/video/omapfb_dss.h +++ b/include/video/omapfb_dss.h @@ -811,14 +811,6 @@ static inline bool omapdss_device_is_enabled(struct omap_dss_device *dssdev) return dssdev->state == OMAP_DSS_DISPLAY_ACTIVE; } -struct device_node * -omapdss_of_get_next_port(const struct device_node *parent, - struct device_node *prev); - -struct device_node * -omapdss_of_get_next_endpoint(const struct device_node *parent, - struct device_node *prev); - struct omap_dss_device * omapdss_of_find_source_for_first_ep(struct device_node *node); #else -- cgit v1.2.3 From 2f2d46959808e9b039ecb241ff13d50be2d6e231 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Oct 2024 18:15:42 +0100 Subject: firmware/psci: Add definitions for PSCI v1.3 specification The v1.3 PSCI spec (https://developer.arm.com/documentation/den0022) adds the SYSTEM_OFF2 function. Add definitions for it and its hibernation type parameter. Signed-off-by: David Woodhouse Reviewed-by: Miguel Luis Link: https://lore.kernel.org/r/20241019172459.2241939-2-dwmw2@infradead.org Signed-off-by: Oliver Upton --- include/uapi/linux/psci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 42a40ad3fb62..81759ff385e6 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -59,6 +59,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) #define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) +#define PSCI_1_3_FN_SYSTEM_OFF2 PSCI_0_2_FN(21) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -68,6 +69,7 @@ #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) #define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) +#define PSCI_1_3_FN64_SYSTEM_OFF2 PSCI_0_2_FN64(21) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff @@ -100,6 +102,9 @@ #define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 #define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U +/* PSCI v1.3 hibernate type for SYSTEM_OFF2 */ +#define PSCI_1_3_OFF_TYPE_HIBERNATE_OFF BIT(0) + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ -- cgit v1.2.3 From 8be82d536a9f8d9974cf746d235c1f1c24dd31ed Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Oct 2024 18:15:44 +0100 Subject: KVM: arm64: Add support for PSCI v1.2 and v1.3 As with PSCI v1.1 in commit 512865d83fd9 ("KVM: arm64: Bump guest PSCI version to 1.1"), expose v1.3 to the guest by default. The SYSTEM_OFF2 call which is exposed by doing so is compatible for userspace because it's just a new flag in the event that KVM raises, in precisely the same way that SYSTEM_RESET2 was compatible when v1.1 was enabled by default. Signed-off-by: David Woodhouse Reviewed-by: Miguel Luis Link: https://lore.kernel.org/r/20241019172459.2241939-4-dwmw2@infradead.org Signed-off-by: Oliver Upton --- include/kvm/arm_psci.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index e8fb624013d1..cbaec804eb83 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -14,8 +14,10 @@ #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) #define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) #define KVM_ARM_PSCI_1_1 PSCI_VERSION(1, 1) +#define KVM_ARM_PSCI_1_2 PSCI_VERSION(1, 2) +#define KVM_ARM_PSCI_1_3 PSCI_VERSION(1, 3) -#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_1 +#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_3 static inline int kvm_psci_version(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From f730fd535fc51573f982fad629f2fc6b4a0cde2f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 19 Aug 2024 09:41:15 +0200 Subject: cleanup: Remove address space of returned pointer Guard functions in local_lock.h are defined using DEFINE_GUARD() and DEFINE_LOCK_GUARD_1() macros having lock type defined as pointer in the percpu address space. The functions, defined by these macros return value in generic address space, causing: cleanup.h:157:18: error: return from pointer to non-enclosed address space and cleanup.h:214:18: error: return from pointer to non-enclosed address space when strict percpu checks are enabled. Add explicit casts to remove address space of the returned pointer. Found by GCC's named address space checks. Fixes: e4ab322fbaaa ("cleanup: Add conditional guard support") Signed-off-by: Uros Bizjak Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240819074124.143565-1-ubizjak@gmail.com --- include/linux/cleanup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 038b2d523bf8..518bd1fd86fb 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -290,7 +290,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return *_T; } + { return (void *)(__force unsigned long)*_T; } #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ EXTEND_CLASS(_name, _ext, \ @@ -347,7 +347,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ \ static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ { \ - return _T->lock; \ + return (void *)(__force unsigned long)_T->lock; \ } -- cgit v1.2.3 From fcc22ac5baf06dd17193de44b60dbceea6461983 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 18 Oct 2024 13:38:14 +0200 Subject: cleanup: Adjust scoped_guard() macros to avoid potential warning Change scoped_guard() and scoped_cond_guard() macros to make reasoning about them easier for static analysis tools (smatch, compiler diagnostics), especially to enable them to tell if the given usage of scoped_guard() is with a conditional lock class (interruptible-locks, try-locks) or not (like simple mutex_lock()). Add compile-time error if scoped_cond_guard() is used for non-conditional lock class. Beyond easier tooling and a little shrink reported by bloat-o-meter this patch enables developer to write code like: int foo(struct my_drv *adapter) { scoped_guard(spinlock, &adapter->some_spinlock) return adapter->spinlock_protected_var; } Current scoped_guard() implementation does not support that, due to compiler complaining: error: control reaches end of non-void function [-Werror=return-type] Technical stuff about the change: scoped_guard() macro uses common idiom of using "for" statement to declare a scoped variable. Unfortunately, current logic is too hard for compiler diagnostics to be sure that there is exactly one loop step; fix that. To make any loop so trivial that there is no above warning, it must not depend on any non-const variable to tell if there are more steps. There is no obvious solution for that in C, but one could use the compound statement expression with "goto" jumping past the "loop", effectively leaving only the subscope part of the loop semantics. More impl details: one more level of macro indirection is now needed to avoid duplicating label names; I didn't spot any other place that is using the "for (...; goto label) if (0) label: break;" idiom, so it's not packed for reuse beyond scoped_guard() family, what makes actual macros code cleaner. There was also a need to introduce const true/false variable per lock class, it is used to aid compiler diagnostics reasoning about "exactly 1 step" loops (note that converting that to function would undo the whole benefit). Big thanks to Andy Shevchenko for help on this patch, both internal and public, ranging from whitespace/formatting, through commit message clarifications, general improvements, ending with presenting alternative approaches - all despite not even liking the idea. Big thanks to Dmitry Torokhov for the idea of compile-time check for scoped_cond_guard() (to use it only with conditional locsk), and general improvements for the patch. Big thanks to David Lechner for idea to cover also scoped_cond_guard(). Signed-off-by: Przemek Kitszel Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Torokhov Link: https://lkml.kernel.org/r/20241018113823.171256-1-przemyslaw.kitszel@intel.com --- include/linux/cleanup.h | 52 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 518bd1fd86fb..0cc66f8d28e7 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -285,14 +285,20 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * similar to scoped_guard(), except it does fail when the lock * acquire fails. * + * Only for conditional locks. */ +#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ +static __maybe_unused const bool class_##_name##_is_conditional = _is_cond + #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ { return (void *)(__force unsigned long)*_T; } #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ class_##_name##_t _T) \ @@ -303,17 +309,40 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __is_cond_ptr(_name) class_##_name##_is_conditional -#define scoped_guard(_name, args...) \ - for (CLASS(_name, scope)(args), \ - *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) - -#define scoped_cond_guard(_name, _fail, args...) \ - for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) \ - if (!__guard_ptr(_name)(&scope)) _fail; \ - else - +/* + * Helper macro for scoped_guard(). + * + * Note that the "!__is_cond_ptr(_name)" part of the condition ensures that + * compiler would be sure that for the unconditional locks the body of the + * loop (caller-provided code glued to the else clause) could not be skipped. + * It is needed because the other part - "__guard_ptr(_name)(&scope)" - is too + * hard to deduce (even if could be proven true for unconditional locks). + */ +#define __scoped_guard(_name, _label, args...) \ + for (CLASS(_name, scope)(args); \ + __guard_ptr(_name)(&scope) || !__is_cond_ptr(_name); \ + ({ goto _label; })) \ + if (0) { \ +_label: \ + break; \ + } else + +#define scoped_guard(_name, args...) \ + __scoped_guard(_name, __UNIQUE_ID(label), args) + +#define __scoped_cond_guard(_name, _fail, _label, args...) \ + for (CLASS(_name, scope)(args); true; ({ goto _label; })) \ + if (!__guard_ptr(_name)(&scope)) { \ + BUILD_BUG_ON(!__is_cond_ptr(_name)); \ + _fail; \ +_label: \ + break; \ + } else + +#define scoped_cond_guard(_name, _fail, args...) \ + __scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args) /* * Additional helper macros for generating lock guards with types, either for * locks that don't have a native type (eg. RCU, preempt) or those that need a @@ -369,14 +398,17 @@ static inline class_##_name##_t class_##_name##_constructor(void) \ } #define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) #define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) #define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ if (_T->lock && !(_condlock)) _T->lock = NULL; \ -- cgit v1.2.3 From 36c2cf88808d47e926d11b98734f154fe4a9f50f Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 1 Oct 2024 17:30:18 -0500 Subject: cleanup: Add conditional guard helper Add a new if_not_guard() macro to cleanup.h for handling conditional guards such as mutext_trylock(). This is more ergonomic than scoped_guard() for most use cases. Instead of hiding the error handling statement in the macro args, it works like a normal if statement and allow the error path to be indented while the normal code flow path is not indented. And it avoid unwanted side-effect from hidden for loop in scoped_guard(). Signed-off-by: David Lechner Co-developed-by: Fabio M. De Francesco Signed-off-by: Fabio M. De Francesco Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dan Williams Link: https://lkml.kernel.org/r/20241001-cleanup-if_not_cond_guard-v1-1-7753810b0f7a@baylibre.com --- include/linux/cleanup.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 0cc66f8d28e7..e859f79b9d2d 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -273,6 +273,12 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * an anonymous instance of the (guard) class, not recommended for * conditional locks. * + * if_not_guard(name, args...) { }: + * convenience macro for conditional guards that calls the statement that + * follows only if the lock was not acquired (typically an error return). + * + * Only for conditional locks. + * * scoped_guard (name, args...) { }: * similar to CLASS(name, scope)(args), except the variable (with the * explicit name 'scope') is declard in a for-loop such that its scope is @@ -343,6 +349,15 @@ _label: \ #define scoped_cond_guard(_name, _fail, args...) \ __scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args) + +#define __if_not_guard(_name, _id, args...) \ + BUILD_BUG_ON(!__is_cond_ptr(_name)); \ + CLASS(_name, _id)(args); \ + if (!__guard_ptr(_name)(&_id)) + +#define if_not_guard(_name, args...) \ + __if_not_guard(_name, __UNIQUE_ID(guard), args) + /* * Additional helper macros for generating lock guards with types, either for * locks that don't have a native type (eg. RCU, preempt) or those that need a -- cgit v1.2.3 From 49991cca67d584a59cb10d48825cce3d11f7d843 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Sat, 19 Oct 2024 11:47:27 +0300 Subject: dt-bindings: clock: r9a08g045-cpg: Add power domain ID for RTC The RTC and VBATTB don't share the MSTOP control bit (but only the bus clock and the reset signal). As the MSTOP control is modeled though power domains add power domain ID for the RTC device available on the Renesas RZ/G3S SoC. Signed-off-by: Claudiu Beznea Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20241019084738.3370489-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/r9a08g045-cpg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/r9a08g045-cpg.h b/include/dt-bindings/clock/r9a08g045-cpg.h index 8281e9caf3a9..311521fe4b59 100644 --- a/include/dt-bindings/clock/r9a08g045-cpg.h +++ b/include/dt-bindings/clock/r9a08g045-cpg.h @@ -308,5 +308,6 @@ #define R9A08G045_PD_DDR 64 #define R9A08G045_PD_TZCDDR 65 #define R9A08G045_PD_OTFDE_DDR 66 +#define R9A08G045_PD_RTC 67 #endif /* __DT_BINDINGS_CLOCK_R9A08G045_CPG_H__ */ -- cgit v1.2.3 From ec24643bdd625971933451f22b8e33d364920f6e Mon Sep 17 00:00:00 2001 From: Vibhore Vardhan Date: Mon, 7 Oct 2024 08:08:56 +0200 Subject: firmware: ti_sci: Add system suspend and resume call Introduce system suspend call that enables the ti_sci driver to support low power mode when the user space issues a suspend to mem. The following power management operations defined in the TISCI Low Power Mode API [1] are implemented to support suspend and resume: 1) TISCI_MSG_PREPARE_SLEEP Prepare the SOC for entering into a low power mode and provide details to firmware about the state being entered. 2) TISCI_MSG_SET_IO_ISOLATION Control the IO isolation for Low Power Mode. Also, write a ti_sci_prepare_system_suspend call to be used in the driver suspend handler to allow the system to identify the low power mode being entered and if necessary, send TISCI_MSG_PREPARE_SLEEP with information about the mode being entered. Sysfw version >= 10.00.04 support LPM_DM_MANAGED capability [2], where Device Mgr firmware now manages which low power mode is chosen. Going forward, this is the default configuration supported for TI AM62 family of devices. The state chosen by the DM can be influenced by sending constraints using the new LPM constraint APIs. In case the firmware does not support LPM_DM_MANAGED mode, the mode selection logic can be extended as needed. If no suspend-to-RAM modes are supported, return without taking any action. We're using "pm_suspend_target_state" to map the kernel's target suspend state to SysFW low power mode. Make sure this is available only when CONFIG_SUSPEND is enabled. Suspend has to be split into two parts, ti_sci_suspend() will send the prepare sleep message to prepare suspend. ti_sci_suspend_noirq() sets IO isolation which needs to be done as late as possible to avoid any issues. On resume this has to be done as early as possible. [1] https://software-dl.ti.com/tisci/esd/latest/2_tisci_msgs/pm/lpm.html Co-developed-by: Dave Gerlach Signed-off-by: Dave Gerlach Signed-off-by: Georgi Vlaev Signed-off-by: Dhruva Gole Signed-off-by: Vibhore Vardhan Signed-off-by: Kevin Hilman Tested-by: Dhruva Gole Signed-off-by: Markus Schneider-Pargmann Tested-by: Roger Quadros Link: https://lore.kernel.org/r/20241007-tisci-syssuspendresume-v13-3-ed54cd659a49@baylibre.com Signed-off-by: Nishanth Menon --- include/linux/soc/ti/ti_sci_protocol.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index bd0d11af76c5..1f1871e23f76 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -195,6 +195,10 @@ struct ti_sci_clk_ops { u64 *current_freq); }; +/* TISCI LPM IO isolation control values */ +#define TISCI_MSG_VALUE_IO_ENABLE 1 +#define TISCI_MSG_VALUE_IO_DISABLE 0 + /** * struct ti_sci_resource_desc - Description of TI SCI resource instance range. * @start: Start index of the first resource range. -- cgit v1.2.3 From 60357991f6b9d4bd4dc442a368da3f468cfa4903 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Mon, 7 Oct 2024 08:08:57 +0200 Subject: firmware: ti_sci: Introduce Power Management Ops Introduce power management ops supported by the TISCI Low Power Mode API [1]. 1) TISCI_MSG_LPM_WAKE_REASON Get which wake up source woke the SoC from Low Power Mode. The wake up source IDs will be common for all K3 platforms. 2) TISCI_MSG_LPM_SET_DEVICE_CONSTRAINT Set LPM constraint on behalf of a device. By setting a constraint, the device ensures that it will not be powered off or reset in the selected mode. 3) TISCI_MSG_LPM_SET_LATENCY_CONSTRAINT Set LPM resume latency constraint. By setting a constraint, the host ensures that the resume time from selected mode will be less than the constraint value. [1] https://software-dl.ti.com/tisci/esd/latest/2_tisci_msgs/pm/lpm.html Signed-off-by: Dave Gerlach [g-vlaev@ti.com: LPM_WAKE_REASON and IO_ISOLATION support] Signed-off-by: Georgi Vlaev [a-kaur@ti.com: SET_DEVICE_CONSTRAINT support] Signed-off-by: Akashdeep Kaur [vibhore@ti.com: SET_LATENCY_CONSTRAINT support] Signed-off-by: Vibhore Vardhan Signed-off-by: Kevin Hilman Reviewed-by: Akashdeep Kaur Tested-by: Dhruva Gole Signed-off-by: Markus Schneider-Pargmann Tested-by: Kevin Hilman Tested-by: Roger Quadros Acked-by: Dhruva Gole Link: https://lore.kernel.org/r/20241007-tisci-syssuspendresume-v13-4-ed54cd659a49@baylibre.com Signed-off-by: Nishanth Menon --- include/linux/soc/ti/ti_sci_protocol.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 1f1871e23f76..fd104b666836 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -199,6 +199,31 @@ struct ti_sci_clk_ops { #define TISCI_MSG_VALUE_IO_ENABLE 1 #define TISCI_MSG_VALUE_IO_DISABLE 0 +/* TISCI LPM constraint state values */ +#define TISCI_MSG_CONSTRAINT_SET 1 +#define TISCI_MSG_CONSTRAINT_CLR 0 + +/** + * struct ti_sci_pm_ops - Low Power Mode (LPM) control operations + * @lpm_wake_reason: Get the wake up source that woke the SoC from LPM + * - source: The wake up source that woke soc from LPM. + * - timestamp: Timestamp at which soc woke. + * @set_device_constraint: Set LPM constraint on behalf of a device + * - id: Device Identifier + * - state: The desired state of device constraint: set or clear. + * @set_latency_constraint: Set LPM resume latency constraint + * - latency: maximum acceptable latency to wake up from low power mode + * - state: The desired state of latency constraint: set or clear. + */ +struct ti_sci_pm_ops { + int (*lpm_wake_reason)(const struct ti_sci_handle *handle, + u32 *source, u64 *timestamp, u8 *pin, u8 *mode); + int (*set_device_constraint)(const struct ti_sci_handle *handle, + u32 id, u8 state); + int (*set_latency_constraint)(const struct ti_sci_handle *handle, + u16 latency, u8 state); +}; + /** * struct ti_sci_resource_desc - Description of TI SCI resource instance range. * @start: Start index of the first resource range. @@ -543,6 +568,7 @@ struct ti_sci_ops { struct ti_sci_core_ops core_ops; struct ti_sci_dev_ops dev_ops; struct ti_sci_clk_ops clk_ops; + struct ti_sci_pm_ops pm_ops; struct ti_sci_rm_core_ops rm_core_ops; struct ti_sci_rm_irq_ops rm_irq_ops; struct ti_sci_rm_ringacc_ops rm_ring_ops; -- cgit v1.2.3 From 3ae80b375739495e36fc6143ff27716fe390a13e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 23 Oct 2024 16:15:31 +0200 Subject: drm/sched: warn about drm_sched_job_init()'s partial init drm_sched_job_init()'s name suggests that after the function succeeded, parameter "job" will be fully initialized. This is not the case; some members are only later set, notably drm_sched_job.sched by drm_sched_job_arm(). Document that drm_sched_job_init() does not set all struct members. Document the lifetime of drm_sched_job.sched. Reviewed-by: Matthew Brost Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20241023141530.113370-2-pstanner@redhat.com --- include/drm/gpu_scheduler.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index ab161289d1bf..95e17504e46a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -340,6 +340,14 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); struct drm_sched_job { struct spsc_node queue_node; struct list_head list; + + /** + * @sched: + * + * The scheduler this job is or will be scheduled on. Gets set by + * drm_sched_job_arm(). Valid until drm_sched_backend_ops.free_job() + * has finished. + */ struct drm_gpu_scheduler *sched; struct drm_sched_fence *s_fence; -- cgit v1.2.3 From 037bc38b298c9a8de64f84b253c0b472228bbb10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:03 -0700 Subject: KVM: Drop KVM_ERR_PTR_BAD_PAGE and instead return NULL to indicate an error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove KVM_ERR_PTR_BAD_PAGE and instead return NULL, as "bad page" is just a leftover bit of weirdness from days of old when KVM stuffed a "bad" page into the guest instead of actually handling missing pages. See commit cea7bb21280e ("KVM: MMU: Make gfn_to_page() always safe"). Reviewed-by: Alex Bennée Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-2-seanjc@google.com> --- include/linux/kvm_host.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45be36e5285f..0b280e5bad0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -153,13 +153,6 @@ static inline bool kvm_is_error_gpa(gpa_t gpa) return gpa == INVALID_GPA; } -#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) - -static inline bool is_error_page(struct page *page) -{ - return IS_ERR(page); -} - #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) -- cgit v1.2.3 From 3af91068b7e10dba438f70eae94d877f20842fa1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:05 -0700 Subject: KVM: Add kvm_release_page_unused() API to put pages that KVM never consumes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an API to release an unused page, i.e. to put a page without marking it accessed or dirty. The API will be used when KVM faults-in a page but bails before installing the guest mapping (and other similar flows). Reviewed-by: Alex Bennée Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-4-seanjc@google.com> --- include/linux/kvm_host.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0b280e5bad0a..8de9acb0b35e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1216,6 +1216,15 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); + +static inline void kvm_release_page_unused(struct page *page) +{ + if (!page) + return; + + put_page(page); +} + void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -- cgit v1.2.3 From 6419bc52072b928acb764766968c672d5fede802 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:13 -0700 Subject: KVM: Rename gfn_to_page_many_atomic() to kvm_prefetch_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename gfn_to_page_many_atomic() to kvm_prefetch_pages() to try and communicate its true purpose, as the "atomic" aspect is essentially a side effect of the fact that x86 uses the API while holding mmu_lock. E.g. even if mmu_lock weren't held, KVM wouldn't want to fault-in pages, as the goal is to opportunistically grab surrounding pages that have already been accessed and/or dirtied by the host, and to do so quickly. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-12-seanjc@google.com> --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8de9acb0b35e..6a3976c1a218 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1207,8 +1207,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm); void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); -int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, - struct page **pages, int nr_pages); +int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, + struct page **pages, int nr_pages); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From e2d2ca71ac03c748dbc44e0dd7dc1557befb1ab6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:14 -0700 Subject: KVM: Drop @atomic param from gfn=>pfn and hva=>pfn APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop @atomic from the myriad "to_pfn" APIs now that all callers pass "false", and remove a comment blurb about KVM running only the "GUP fast" part in atomic context. No functional change intended. Reviewed-by: Alex Bennée Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-13-seanjc@google.com> --- include/linux/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6a3976c1a218..32e23e05a8c3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1232,9 +1232,8 @@ kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool atomic, bool interruptible, bool *async, + bool interruptible, bool *async, bool write_fault, bool *writable, hva_t *hva); void kvm_release_pfn_clean(kvm_pfn_t pfn); -- cgit v1.2.3 From 6769d1bcd3509ad2d2ee04da122c465a11a165b4 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Thu, 10 Oct 2024 11:23:18 -0700 Subject: KVM: Replace "async" pointer in gfn=>pfn with "no_wait" and error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a pfn error code to communicate that hva_to_pfn() failed because I/O was needed and disallowed, and convert @async to a constant @no_wait boolean. This will allow eliminating the @no_wait param by having callers pass in FOLL_NOWAIT along with other FOLL_* flags. Tested-by: Alex Bennée Signed-off-by: David Stevens Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-17-seanjc@google.com> --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 32e23e05a8c3..dc15a9a64408 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -97,6 +97,7 @@ #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) +#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4) /* * error pfns indicate that the gfn is in slot but faild to @@ -1233,7 +1234,7 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool interruptible, bool *async, + bool interruptible, bool no_wait, bool write_fault, bool *writable, hva_t *hva); void kvm_release_pfn_clean(kvm_pfn_t pfn); -- cgit v1.2.3 From cccefb0a0d3b4f7b41b1921538087dd7031876ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:20 -0700 Subject: KVM: Drop unused "hva" pointer from __gfn_to_pfn_memslot() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop @hva from __gfn_to_pfn_memslot() now that all callers pass NULL. No functional change intended. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-19-seanjc@google.com> --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dc15a9a64408..2c9eb472f059 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1235,7 +1235,7 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, bool interruptible, bool no_wait, - bool write_fault, bool *writable, hva_t *hva); + bool write_fault, bool *writable); void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_dirty(kvm_pfn_t pfn); -- cgit v1.2.3 From ef7db98e477f4b379fac3131bf94c33774c4a211 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:24 -0700 Subject: KVM: Use NULL for struct page pointer to indicate mremapped memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop yet another unnecessary magic page value from KVM, as there's zero reason to use a poisoned pointer to indicate "no page". If KVM uses a NULL page pointer, the kernel will explode just as quickly as if KVM uses a poisoned pointer. Never mind the fact that such usage would be a blatant and egregious KVM bug. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-23-seanjc@google.com> --- include/linux/kvm_host.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c9eb472f059..cd6f5cc1930f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -273,16 +273,12 @@ enum { READING_SHADOW_PAGE_TABLES, }; -#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA) - struct kvm_host_map { /* * Only valid if the 'pfn' is managed by the host kernel (i.e. There is * a 'struct page' for it. When using mem= kernel parameter some memory * can be used as guest memory but they are not managed by host * kernel). - * If 'pfn' is not managed by the host kernel, this field is - * initialized to KVM_UNMAPPED_PAGE. */ struct page *page; void *hva; -- cgit v1.2.3 From 2ff072ba7ad2deb1c3b2d231faa0ac3a25e2451b Mon Sep 17 00:00:00 2001 From: David Stevens Date: Thu, 10 Oct 2024 11:23:32 -0700 Subject: KVM: Migrate kvm_vcpu_map() to kvm_follow_pfn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate kvm_vcpu_map() to kvm_follow_pfn(), and have it track whether or not the map holds a refcounted struct page. Precisely tracking struct page references will eventually allow removing kvm_pfn_to_refcounted_page() and its various wrappers. Signed-off-by: David Stevens [sean: use a pointer instead of a boolean] Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-31-seanjc@google.com> --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cd6f5cc1930f..35e1beb017dd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -280,6 +280,7 @@ struct kvm_host_map { * can be used as guest memory but they are not managed by host * kernel). */ + struct page *refcounted_page; struct page *page; void *hva; kvm_pfn_t pfn; @@ -1238,7 +1239,6 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); -void kvm_release_pfn(kvm_pfn_t pfn, bool dirty); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -- cgit v1.2.3 From 2bcb52a3602bf4cbc55d8fb4da00c930f83d7789 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:33 -0700 Subject: KVM: Pin (as in FOLL_PIN) pages during kvm_vcpu_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin, as in FOLL_PIN, pages when mapping them for direct access by KVM. As per Documentation/core-api/pin_user_pages.rst, writing to a page that was gotten via FOLL_GET is explicitly disallowed. Correct (uses FOLL_PIN calls): pin_user_pages() write to the data within the pages unpin_user_pages() INCORRECT (uses FOLL_GET calls): get_user_pages() write to the data within the pages put_page() Unfortunately, FOLL_PIN is a "private" flag, and so kvm_follow_pfn must use a one-off bool instead of being able to piggyback the "flags" field. Link: https://lwn.net/Articles/930667 Link: https://lore.kernel.org/all/cover.1683044162.git.lstoakes@gmail.com Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-32-seanjc@google.com> --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 35e1beb017dd..b4c541fa5a1f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -280,7 +280,7 @@ struct kvm_host_map { * can be used as guest memory but they are not managed by host * kernel). */ - struct page *refcounted_page; + struct page *pinned_page; struct page *page; void *hva; kvm_pfn_t pfn; -- cgit v1.2.3 From 365e319208442a0807a96e9ea4d0b1fa338f1929 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:35 -0700 Subject: KVM: Pass in write/dirty to kvm_vcpu_map(), not kvm_vcpu_unmap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all kvm_vcpu_{,un}map() users pass "true" for @dirty, have them pass "true" as a @writable param to kvm_vcpu_map(), and thus create a read-only mapping when possible. Note, creating read-only mappings can be theoretically slower, as they don't play nice with fast GUP due to the need to break CoW before mapping the underlying PFN. But practically speaking, creating a mapping isn't a super hot path, and getting a writable mapping for reading is weird and confusing. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-34-seanjc@google.com> --- include/linux/kvm_host.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b4c541fa5a1f..101dbf2be1ce 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -285,6 +285,7 @@ struct kvm_host_map { void *hva; kvm_pfn_t pfn; kvm_pfn_t gfn; + bool writable; }; /* @@ -1311,8 +1312,23 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); -int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); + +int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, + bool writable); +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map); + +static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, + struct kvm_host_map *map) +{ + return __kvm_vcpu_map(vcpu, gpa, map, true); +} + +static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, + struct kvm_host_map *map) +{ + return __kvm_vcpu_map(vcpu, gpa, map, false); +} + unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, -- cgit v1.2.3 From 21dd877060d49f1f57901f929189653fc42ac37a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:44 -0700 Subject: KVM: Move declarations of memslot accessors up in kvm_host.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the memslot lookup helpers further up in kvm_host.h so that they can be used by inlined "to pfn" wrappers. No functional change intended. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-43-seanjc@google.com> --- include/linux/kvm_host.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 101dbf2be1ce..8d35a36e7707 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1168,6 +1168,10 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_ kvm_memslot_iter_is_valid(iter, end); \ kvm_memslot_iter_next(iter)) +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); +struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); + /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot @@ -1303,16 +1307,12 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, }) int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); -struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); -struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); - int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, bool writable); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map); -- cgit v1.2.3 From 1c7b627e930624dd64ee906df554c8f2bad628ff Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:45 -0700 Subject: KVM: Add kvm_faultin_pfn() to specifically service guest page faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new dedicated API, kvm_faultin_pfn(), for servicing guest page faults, i.e. for getting pages/pfns that will be mapped into the guest via an mmu_notifier-protected KVM MMU. Keep struct kvm_follow_pfn buried in internal code, as having __kvm_faultin_pfn() take "out" params is actually cleaner for several architectures, e.g. it allows the caller to have its own "page fault" structure without having to marshal data to/from kvm_follow_pfn. Long term, common KVM would ideally provide a kvm_page_fault structure, a la x86's struct of the same name. But all architectures need to be converted to a common API before that can happen. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-44-seanjc@google.com> --- include/linux/kvm_host.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d35a36e7707..a63b0325d3e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1231,6 +1231,18 @@ static inline void kvm_release_page_unused(struct page *page) void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); +kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn, + unsigned int foll, bool *writable, + struct page **refcounted_page); + +static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, + bool write, bool *writable, + struct page **refcounted_page) +{ + return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, + write ? FOLL_WRITE : 0, writable, refcounted_page); +} + kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); -- cgit v1.2.3 From 1fbee5b01a0fd27db571eed757682a7c20045107 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:48 -0700 Subject: KVM: guest_memfd: Provide "struct page" as output from kvm_gmem_get_pfn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide the "struct page" associated with a guest_memfd pfn as an output from __kvm_gmem_get_pfn() so that KVM guest page fault handlers can directly put the page instead of having to rely on kvm_pfn_to_refcounted_page(). Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-47-seanjc@google.com> --- include/linux/kvm_host.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a63b0325d3e2..6efdc00b4254 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2487,11 +2487,13 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) #ifdef CONFIG_KVM_PRIVATE_MEM int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order); + gfn_t gfn, kvm_pfn_t *pfn, struct page **page, + int *max_order); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, - kvm_pfn_t *pfn, int *max_order) + kvm_pfn_t *pfn, struct page **page, + int *max_order) { KVM_BUG_ON(1, kvm); return -EIO; -- cgit v1.2.3 From dc06193532af4ba88ed20daeef88f22b053ebb91 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:23:51 -0700 Subject: KVM: Move x86's API to release a faultin page to common KVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move KVM x86's helper that "finishes" the faultin process to common KVM so that the logic can be shared across all architectures. Note, not all architectures implement a fast page fault path, but the gist of the comment applies to all architectures. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-50-seanjc@google.com> --- include/linux/kvm_host.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6efdc00b4254..3e06393e5f1e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1231,6 +1231,32 @@ static inline void kvm_release_page_unused(struct page *page) void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); +static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page, + bool unused, bool dirty) +{ + lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused); + + if (!page) + return; + + /* + * If the page that KVM got from the *primary MMU* is writable, and KVM + * installed or reused a SPTE, mark the page/folio dirty. Note, this + * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if + * the GFN is write-protected. Folios can't be safely marked dirty + * outside of mmu_lock as doing so could race with writeback on the + * folio. As a result, KVM can't mark folios dirty in the fast page + * fault handler, and so KVM must (somewhat) speculatively mark the + * folio dirty if KVM could locklessly make the SPTE writable. + */ + if (unused) + kvm_release_page_unused(page); + else if (dirty) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); +} + kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn, unsigned int foll, bool *writable, struct page **refcounted_page); -- cgit v1.2.3 From f42e289a2095f61755e6ca5fd1370d441bf589d5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:24:18 -0700 Subject: KVM: Add support for read-only usage of gfn_to_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework gfn_to_page() to support read-only accesses so that it can be used by arm64 to get MTE tags out of guest memory. Opportunistically rewrite the comment to be even more stern about using gfn_to_page(), as there are very few scenarios where requiring a struct page is actually the right thing to do (though there are such scenarios). Add a FIXME to call out that KVM probably should be pinning pages, not just getting pages. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-77-seanjc@google.com> --- include/linux/kvm_host.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3e06393e5f1e..96cf9e5660c3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1213,7 +1213,12 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages); -struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); +struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write); +static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +{ + return __gfn_to_page(kvm, gfn, true); +} + unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); -- cgit v1.2.3 From 06cdaff80e50e3fb74e5e3101e1d5d7aa8b68da6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:24:21 -0700 Subject: KVM: Drop gfn_to_pfn() APIs now that all users are gone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop gfn_to_pfn() and all its variants now that all users are gone. No functional change intended. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-80-seanjc@google.com> --- include/linux/kvm_host.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 96cf9e5660c3..4a1eaa40a215 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1274,14 +1274,6 @@ static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, write ? FOLL_WRITE : 0, writable, refcounted_page); } -kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, - bool *writable); -kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool interruptible, bool no_wait, - bool write_fault, bool *writable); - void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); -- cgit v1.2.3 From 93b7da404f5b0b02a4211bbb784889f001d27953 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:24:26 -0700 Subject: KVM: Drop APIs that manipulate "struct page" via pfns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all kvm_{release,set}_pfn_*() APIs now that all users are gone. No functional change intended. Reviewed-by: Alex Bennée Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-85-seanjc@google.com> --- include/linux/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4a1eaa40a215..d045f8310a48 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1274,11 +1274,6 @@ static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, write ? FOLL_WRITE : 0, writable, refcounted_page); } -void kvm_release_pfn_clean(kvm_pfn_t pfn); -void kvm_release_pfn_dirty(kvm_pfn_t pfn); -void kvm_set_pfn_dirty(kvm_pfn_t pfn); -void kvm_set_pfn_accessed(kvm_pfn_t pfn); - int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -- cgit v1.2.3 From 8b15c3764c05ed8766709711d2054d96349dee8e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Oct 2024 11:24:27 -0700 Subject: KVM: Don't grab reference on VM_MIXEDMAP pfns that have a "struct page" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that KVM no longer relies on an ugly heuristic to find its struct page references, i.e. now that KVM can't get false positives on VM_MIXEDMAP pfns, remove KVM's hack to elevate the refcount for pfns that happen to have a valid struct page. In addition to removing a long-standing wart in KVM, this allows KVM to map non-refcounted struct page memory into the guest, e.g. for exposing GPU TTM buffers to KVM guests. Tested-by: Alex Bennée Signed-off-by: Sean Christopherson Tested-by: Dmitry Osipenko Signed-off-by: Paolo Bonzini Message-ID: <20241010182427.1434605-86-seanjc@google.com> --- include/linux/kvm_host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d045f8310a48..02f0206fd2dc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1730,9 +1730,6 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); -struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); -bool kvm_is_zone_device_page(struct page *page); - struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; -- cgit v1.2.3 From 6860d28ccb2390b4eeda32ab2ce7eb10f71921e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Oct 2024 12:08:39 +0200 Subject: timekeeping: Reorder struct timekeeper struct timekeeper is ordered suboptimal vs. cachelines. The layout, including the preceding seqcount (see struct tk_core in timekeeper.c) is: cacheline 0: seqcount, tkr_mono cacheline 1: tkr_raw, xtime_sec cacheline 2: ktime_sec ... tai_offset, internal variables cacheline 3: next_leap_ktime, raw_sec, internal variables cacheline 4: internal variables So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW will use either cachelines 0 + 1 or cachelines 0 + 2. Access to CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3. Reorder the members so that the result is more efficient: cacheline 0: seqcount, tkr_mono cacheline 1: xtime_sec, ktime_sec ... tai_offset cacheline 2: tkr_raw, raw_sec cacheline 3: internal variables cacheline 4: internal variables That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW access will use cachelines 0 + 2. Update kernel-doc and fix formatting issues while at it. Also fix a typo in struct tk_read_base kernel-doc. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20241015100839.12702-1-anna-maria@linutronix.de --- include/linux/timekeeper_internal.h | 106 ++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 902c20ef495a..a3b6380a7777 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -26,7 +26,7 @@ * occupies a single 64byte cache line. * * The struct is separate from struct timekeeper as it is also used - * for a fast NMI safe accessors. + * for the fast NMI safe accessors. * * @base_real is for the fast NMI safe accessor to allow reading clock * realtime from any context. @@ -44,33 +44,41 @@ struct tk_read_base { /** * struct timekeeper - Structure holding internal timekeeping values. - * @tkr_mono: The readout base structure for CLOCK_MONOTONIC - * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW - * @xtime_sec: Current CLOCK_REALTIME time in seconds - * @ktime_sec: Current CLOCK_MONOTONIC time in seconds - * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset - * @offs_real: Offset clock monotonic -> clock realtime - * @offs_boot: Offset clock monotonic -> clock boottime - * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds - * @clock_was_set_seq: The sequence number of clock was set events - * @cs_was_changed_seq: The sequence number of clocksource change events - * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second - * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds - * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset - * @cycle_interval: Number of clock cycles in one NTP interval - * @xtime_interval: Number of clock shifted nano seconds in one NTP - * interval. - * @xtime_remainder: Shifted nano seconds left over when rounding - * @cycle_interval - * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. - * @ntp_error: Difference between accumulated time and NTP time in ntp - * shifted nano seconds. - * @ntp_error_shift: Shift conversion between clock shifted nano seconds and - * ntp shifted nano seconds. - * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) - * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) - * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC + * @xtime_sec: Current CLOCK_REALTIME time in seconds + * @ktime_sec: Current CLOCK_MONOTONIC time in seconds + * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset + * @offs_real: Offset clock monotonic -> clock realtime + * @offs_boot: Offset clock monotonic -> clock boottime + * @offs_tai: Offset clock monotonic -> clock tai + * @tai_offset: The current UTC to TAI offset in seconds + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset + * @cycle_interval: Number of clock cycles in one NTP interval + * @xtime_interval: Number of clock shifted nano seconds in one NTP + * interval. + * @xtime_remainder: Shifted nano seconds left over when rounding + * @cycle_interval + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second + * @ntp_tick: The ntp_tick_length() value currently being + * used. This cached copy ensures we consistently + * apply the tick length for an entire tick, as + * ntp_tick_length may change mid-tick, and we don't + * want to apply that new value to the tick in + * progress. + * @ntp_error: Difference between accumulated time and NTP time in ntp + * shifted nano seconds. + * @ntp_error_shift: Shift conversion between clock shifted nano seconds and + * ntp shifted nano seconds. + * @ntp_err_mult: Multiplication factor for scaled math conversion + * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -88,10 +96,28 @@ struct tk_read_base { * * @monotonic_to_boottime is a timespec64 representation of @offs_boot to * accelerate the VDSO update for CLOCK_BOOTTIME. + * + * The cacheline ordering of the structure is optimized for in kernel usage of + * the ktime_get() and ktime_get_ts64() family of time accessors. Struct + * timekeeper is prepended in the core timekeeping code with a sequence count, + * which results in the following cacheline layout: + * + * 0: seqcount, tkr_mono + * 1: xtime_sec ... tai_offset + * 2: tkr_raw, raw_sec + * 3,4: Internal variables + * + * Cacheline 0,1 contain the data which is used for accessing + * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the + * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal + * variables which are only accessed during timekeeper updates once per + * tick. */ struct timekeeper { + /* Cacheline 0 (together with prepended seqcount of timekeeper core): */ struct tk_read_base tkr_mono; - struct tk_read_base tkr_raw; + + /* Cacheline 1: */ u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; @@ -99,31 +125,29 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; + + /* Cacheline 2: */ + struct tk_read_base tkr_raw; + u64 raw_sec; + + /* Cachline 3 and 4 (timekeeping internal variables): */ unsigned int clock_was_set_seq; u8 cs_was_changed_seq; - ktime_t next_leap_ktime; - u64 raw_sec; + struct timespec64 monotonic_to_boot; - /* The following members are for timekeeping internal use */ u64 cycle_interval; u64 xtime_interval; s64 xtime_remainder; u64 raw_interval; - /* The ntp_tick_length() value currently being used. - * This cached copy ensures we consistently apply the tick - * length for an entire tick, as ntp_tick_length may change - * mid-tick, and we don't want to apply that new value to - * the tick in progress. - */ + + ktime_t next_leap_ktime; u64 ntp_tick; - /* Difference between accumulated time and NTP time in ntp - * shifted nano seconds. */ s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; - /* Flag used to avoid updating NTP twice with same second */ u32 skip_second_overflow; + #ifdef CONFIG_DEBUG_TIMEKEEPING long last_warning; /* -- cgit v1.2.3 From 92b043fd995a63a57aae29ff85a39b6f30cd440c Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 25 Oct 2024 13:01:41 +0200 Subject: time: Fix references to _msecs_to_jiffies() handling of values The details about the handling of the "normal" values were moved to the _msecs_to_jiffies() helpers in commit ca42aaf0c861 ("time: Refactor msecs_to_jiffies"). However, the same commit still mentioned __msecs_to_jiffies() in the added documentation. Thus point to _msecs_to_jiffies() instead. Fixes: ca42aaf0c861 ("time: Refactor msecs_to_jiffies") Signed-off-by: Miguel Ojeda Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241025110141.157205-2-ojeda@kernel.org --- include/linux/jiffies.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1220f0fbe5bf..5d21dacd62bc 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -502,7 +502,7 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * - all other values are converted to jiffies by either multiplying * the input value by a factor or dividing it with a factor and * handling any 32-bit overflows. - * for the details see __msecs_to_jiffies() + * for the details see _msecs_to_jiffies() * * msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the -- cgit v1.2.3 From 78bc671bd1501e2f6c571e063301a4fdc5db53b2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Oct 2024 12:47:39 -0700 Subject: scsi: ufs: core: Make DMA mask configuration more flexible Replace UFSHCD_QUIRK_BROKEN_64BIT_ADDRESS with ufs_hba_variant_ops::set_dma_mask. Update the Renesas driver accordingly. This patch enables supporting other configurations than 32-bit or 64-bit DMA addresses, e.g. 36-bit DMA addresses. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241018194753.775074-1-bvanassche@acm.org Reviewed-by: Avri Altman Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a95282b9f743..a2d600ff4372 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -299,6 +299,8 @@ struct ufs_pwr_mode_info { * @max_num_rtt: maximum RTT supported by the host * @init: called when the driver is initialized * @exit: called to cleanup everything done in init + * @set_dma_mask: For setting another DMA mask than indicated by the 64AS + * capability bit. * @get_ufs_hci_version: called to get UFS HCI version * @clk_scale_notify: notifies that clks are scaled up/down * @setup_clocks: called before touching any of the controller registers @@ -341,6 +343,7 @@ struct ufs_hba_variant_ops { int (*init)(struct ufs_hba *); void (*exit)(struct ufs_hba *); u32 (*get_ufs_hci_version)(struct ufs_hba *); + int (*set_dma_mask)(struct ufs_hba *); int (*clk_scale_notify)(struct ufs_hba *, bool, enum ufs_notify_change_status); int (*setup_clocks)(struct ufs_hba *, bool, @@ -623,12 +626,6 @@ enum ufshcd_quirks { */ UFSHCD_QUIRK_SKIP_PH_CONFIGURATION = 1 << 16, - /* - * This quirk needs to be enabled if the host controller has - * 64-bit addressing supported capability but it doesn't work. - */ - UFSHCD_QUIRK_BROKEN_64BIT_ADDRESS = 1 << 17, - /* * This quirk needs to be enabled if the host controller has * auto-hibernate capability but it's FASTAUTO only. -- cgit v1.2.3 From a085e03758b87ee5aea45de27c811576574d795b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Oct 2024 12:30:57 -0700 Subject: scsi: ufs: core: Move the ufshcd_mcq_enable_esi() definition Move the ufshcd_mcq_enable_esi() definition such that it occurs immediately before the ufshcd_mcq_config_esi() definition. Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241022193130.2733293-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a2d600ff4372..d2dad221c12b 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1318,8 +1318,8 @@ void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i); unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, struct ufs_hw_queue *hwq); void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba); -void ufshcd_mcq_enable_esi(struct ufs_hba *hba); void ufshcd_mcq_enable(struct ufs_hba *hba); +void ufshcd_mcq_enable_esi(struct ufs_hba *hba); void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg); int ufshcd_opp_config_clks(struct device *dev, struct opp_table *opp_table, -- cgit v1.2.3 From 2a36646012fc58e6262435ff5d2c8c97456c253f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Oct 2024 12:31:01 -0700 Subject: scsi: ufs: core: Simplify ufshcd_err_handling_prepare() Use blk_mq_quiesce_tagset() instead of ufshcd_scsi_block_requests() and blk_mq_wait_quiesce_done(). Since this patch removes the last callers of ufshcd_scsi_block_requests() and ufshcd_scsi_unblock_requests(), remove these functions. Reviewed-by: Avri Altman Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241022193130.2733293-6-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d2dad221c12b..9ea2a7411bb5 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -925,7 +925,6 @@ enum ufshcd_mcq_opr { * @wb_mutex: used to serialize devfreq and sysfs write booster toggling * @clk_scaling_lock: used to serialize device commands and clock scaling * @desc_size: descriptor sizes reported by device - * @scsi_block_reqs_cnt: reference counting for scsi block requests * @bsg_dev: struct device associated with the BSG queue * @bsg_queue: BSG queue associated with the UFS controller * @rpm_dev_flush_recheck_work: used to suspend from RPM (runtime power @@ -1086,7 +1085,6 @@ struct ufs_hba { struct mutex wb_mutex; struct rw_semaphore clk_scaling_lock; - atomic_t scsi_block_reqs_cnt; struct device bsg_dev; struct request_queue *bsg_queue; -- cgit v1.2.3 From a81dca057273c32b8554b3bc562480d4b3816155 Mon Sep 17 00:00:00 2001 From: Ivaylo Ivanov Date: Wed, 23 Oct 2024 12:01:34 +0300 Subject: dt-bindings: clock: samsung: Add Exynos8895 SoC Provide dt-schema documentation for Samsung Exynos8895 SoC clock controller CMU blocks: - CMU_FSYS0/1 - CMU_PERIC0/1 - CMU_PERIS - CMU_TOP Signed-off-by: Ivaylo Ivanov Link: https://lore.kernel.org/r/20241023090136.537395-2-ivo.ivanov.ivanov1@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynos8895.h | 453 +++++++++++++++++++++++++ 1 file changed, 453 insertions(+) create mode 100644 include/dt-bindings/clock/samsung,exynos8895.h (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynos8895.h b/include/dt-bindings/clock/samsung,exynos8895.h new file mode 100644 index 000000000000..27998c53f929 --- /dev/null +++ b/include/dt-bindings/clock/samsung,exynos8895.h @@ -0,0 +1,453 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2024 Ivaylo Ivanov + * Author: Ivaylo Ivanov + * + * Device Tree binding constants for Exynos8895 clock controller. + */ + +#ifndef _DT_BINDINGS_CLOCK_EXYNOS8895_H +#define _DT_BINDINGS_CLOCK_EXYNOS8895_H + +/* CMU_TOP */ +#define CLK_FOUT_SHARED0_PLL 1 +#define CLK_FOUT_SHARED1_PLL 2 +#define CLK_FOUT_SHARED2_PLL 3 +#define CLK_FOUT_SHARED3_PLL 4 +#define CLK_FOUT_SHARED4_PLL 5 +#define CLK_MOUT_PLL_SHARED0 6 +#define CLK_MOUT_PLL_SHARED1 7 +#define CLK_MOUT_PLL_SHARED2 8 +#define CLK_MOUT_PLL_SHARED3 9 +#define CLK_MOUT_PLL_SHARED4 10 +#define CLK_MOUT_CP2AP_MIF_CLK_USER 11 +#define CLK_MOUT_CMU_ABOX_CPUABOX 12 +#define CLK_MOUT_CMU_APM_BUS 13 +#define CLK_MOUT_CMU_BUS1_BUS 14 +#define CLK_MOUT_CMU_BUSC_BUS 15 +#define CLK_MOUT_CMU_BUSC_BUSPHSI2C 16 +#define CLK_MOUT_CMU_CAM_BUS 17 +#define CLK_MOUT_CMU_CAM_TPU0 18 +#define CLK_MOUT_CMU_CAM_TPU1 19 +#define CLK_MOUT_CMU_CAM_VRA 20 +#define CLK_MOUT_CMU_CIS_CLK0 21 +#define CLK_MOUT_CMU_CIS_CLK1 22 +#define CLK_MOUT_CMU_CIS_CLK2 23 +#define CLK_MOUT_CMU_CIS_CLK3 24 +#define CLK_MOUT_CMU_CORE_BUS 25 +#define CLK_MOUT_CMU_CPUCL0_SWITCH 26 +#define CLK_MOUT_CMU_CPUCL1_SWITCH 27 +#define CLK_MOUT_CMU_DBG_BUS 28 +#define CLK_MOUT_CMU_DCAM_BUS 29 +#define CLK_MOUT_CMU_DCAM_IMGD 30 +#define CLK_MOUT_CMU_DPU_BUS 31 +#define CLK_MOUT_CMU_DROOPDETECTOR 32 +#define CLK_MOUT_CMU_DSP_BUS 33 +#define CLK_MOUT_CMU_FSYS0_BUS 34 +#define CLK_MOUT_CMU_FSYS0_DPGTC 35 +#define CLK_MOUT_CMU_FSYS0_MMC_EMBD 36 +#define CLK_MOUT_CMU_FSYS0_UFS_EMBD 37 +#define CLK_MOUT_CMU_FSYS0_USBDRD30 38 +#define CLK_MOUT_CMU_FSYS1_BUS 39 +#define CLK_MOUT_CMU_FSYS1_MMC_CARD 40 +#define CLK_MOUT_CMU_FSYS1_PCIE 41 +#define CLK_MOUT_CMU_FSYS1_UFS_CARD 42 +#define CLK_MOUT_CMU_G2D_G2D 43 +#define CLK_MOUT_CMU_G2D_JPEG 44 +#define CLK_MOUT_CMU_HPM 45 +#define CLK_MOUT_CMU_IMEM_BUS 46 +#define CLK_MOUT_CMU_ISPHQ_BUS 47 +#define CLK_MOUT_CMU_ISPLP_BUS 48 +#define CLK_MOUT_CMU_IVA_BUS 49 +#define CLK_MOUT_CMU_MFC_BUS 50 +#define CLK_MOUT_CMU_MIF_SWITCH 51 +#define CLK_MOUT_CMU_PERIC0_BUS 52 +#define CLK_MOUT_CMU_PERIC0_UART_DBG 53 +#define CLK_MOUT_CMU_PERIC0_USI00 54 +#define CLK_MOUT_CMU_PERIC0_USI01 55 +#define CLK_MOUT_CMU_PERIC0_USI02 56 +#define CLK_MOUT_CMU_PERIC0_USI03 57 +#define CLK_MOUT_CMU_PERIC1_BUS 58 +#define CLK_MOUT_CMU_PERIC1_SPEEDY2 59 +#define CLK_MOUT_CMU_PERIC1_SPI_CAM0 60 +#define CLK_MOUT_CMU_PERIC1_SPI_CAM1 61 +#define CLK_MOUT_CMU_PERIC1_UART_BT 62 +#define CLK_MOUT_CMU_PERIC1_USI04 63 +#define CLK_MOUT_CMU_PERIC1_USI05 64 +#define CLK_MOUT_CMU_PERIC1_USI06 65 +#define CLK_MOUT_CMU_PERIC1_USI07 66 +#define CLK_MOUT_CMU_PERIC1_USI08 67 +#define CLK_MOUT_CMU_PERIC1_USI09 68 +#define CLK_MOUT_CMU_PERIC1_USI10 69 +#define CLK_MOUT_CMU_PERIC1_USI11 70 +#define CLK_MOUT_CMU_PERIC1_USI12 71 +#define CLK_MOUT_CMU_PERIC1_USI13 72 +#define CLK_MOUT_CMU_PERIS_BUS 73 +#define CLK_MOUT_CMU_SRDZ_BUS 74 +#define CLK_MOUT_CMU_SRDZ_IMGD 75 +#define CLK_MOUT_CMU_VPU_BUS 76 +#define CLK_DOUT_CMU_ABOX_CPUABOX 77 +#define CLK_DOUT_CMU_APM_BUS 78 +#define CLK_DOUT_CMU_BUS1_BUS 79 +#define CLK_DOUT_CMU_BUSC_BUS 80 +#define CLK_DOUT_CMU_BUSC_BUSPHSI2C 81 +#define CLK_DOUT_CMU_CAM_BUS 82 +#define CLK_DOUT_CMU_CAM_TPU0 83 +#define CLK_DOUT_CMU_CAM_TPU1 84 +#define CLK_DOUT_CMU_CAM_VRA 85 +#define CLK_DOUT_CMU_CIS_CLK0 86 +#define CLK_DOUT_CMU_CIS_CLK1 87 +#define CLK_DOUT_CMU_CIS_CLK2 88 +#define CLK_DOUT_CMU_CIS_CLK3 89 +#define CLK_DOUT_CMU_CORE_BUS 90 +#define CLK_DOUT_CMU_CPUCL0_SWITCH 91 +#define CLK_DOUT_CMU_CPUCL1_SWITCH 92 +#define CLK_DOUT_CMU_DBG_BUS 93 +#define CLK_DOUT_CMU_DCAM_BUS 94 +#define CLK_DOUT_CMU_DCAM_IMGD 95 +#define CLK_DOUT_CMU_DPU_BUS 96 +#define CLK_DOUT_CMU_DSP_BUS 97 +#define CLK_DOUT_CMU_FSYS0_BUS 98 +#define CLK_DOUT_CMU_FSYS0_DPGTC 99 +#define CLK_DOUT_CMU_FSYS0_MMC_EMBD 100 +#define CLK_DOUT_CMU_FSYS0_UFS_EMBD 101 +#define CLK_DOUT_CMU_FSYS0_USBDRD30 102 +#define CLK_DOUT_CMU_FSYS1_BUS 103 +#define CLK_DOUT_CMU_FSYS1_MMC_CARD 104 +#define CLK_DOUT_CMU_FSYS1_UFS_CARD 105 +#define CLK_DOUT_CMU_G2D_G2D 106 +#define CLK_DOUT_CMU_G2D_JPEG 107 +#define CLK_DOUT_CMU_G3D_SWITCH 108 +#define CLK_DOUT_CMU_HPM 109 +#define CLK_DOUT_CMU_IMEM_BUS 110 +#define CLK_DOUT_CMU_ISPHQ_BUS 111 +#define CLK_DOUT_CMU_ISPLP_BUS 112 +#define CLK_DOUT_CMU_IVA_BUS 113 +#define CLK_DOUT_CMU_MFC_BUS 114 +#define CLK_DOUT_CMU_MODEM_SHARED0 115 +#define CLK_DOUT_CMU_MODEM_SHARED1 116 +#define CLK_DOUT_CMU_PERIC0_BUS 117 +#define CLK_DOUT_CMU_PERIC0_UART_DBG 118 +#define CLK_DOUT_CMU_PERIC0_USI00 119 +#define CLK_DOUT_CMU_PERIC0_USI01 120 +#define CLK_DOUT_CMU_PERIC0_USI02 121 +#define CLK_DOUT_CMU_PERIC0_USI03 122 +#define CLK_DOUT_CMU_PERIC1_BUS 123 +#define CLK_DOUT_CMU_PERIC1_SPEEDY2 124 +#define CLK_DOUT_CMU_PERIC1_SPI_CAM0 125 +#define CLK_DOUT_CMU_PERIC1_SPI_CAM1 126 +#define CLK_DOUT_CMU_PERIC1_UART_BT 127 +#define CLK_DOUT_CMU_PERIC1_USI04 128 +#define CLK_DOUT_CMU_PERIC1_USI05 129 +#define CLK_DOUT_CMU_PERIC1_USI06 130 +#define CLK_DOUT_CMU_PERIC1_USI07 131 +#define CLK_DOUT_CMU_PERIC1_USI08 132 +#define CLK_DOUT_CMU_PERIC1_USI09 133 +#define CLK_DOUT_CMU_PERIC1_USI10 134 +#define CLK_DOUT_CMU_PERIC1_USI11 135 +#define CLK_DOUT_CMU_PERIC1_USI12 136 +#define CLK_DOUT_CMU_PERIC1_USI13 137 +#define CLK_DOUT_CMU_PERIS_BUS 138 +#define CLK_DOUT_CMU_SRDZ_BUS 139 +#define CLK_DOUT_CMU_SRDZ_IMGD 140 +#define CLK_DOUT_CMU_VPU_BUS 141 +#define CLK_DOUT_CMU_SHARED0_DIV2 142 +#define CLK_DOUT_CMU_SHARED0_DIV4 143 +#define CLK_DOUT_CMU_SHARED1_DIV2 144 +#define CLK_DOUT_CMU_SHARED1_DIV4 145 +#define CLK_DOUT_CMU_SHARED2_DIV2 146 +#define CLK_DOUT_CMU_SHARED3_DIV2 147 +#define CLK_DOUT_CMU_SHARED4_DIV2 148 +#define CLK_DOUT_CMU_FSYS1_PCIE 149 +#define CLK_DOUT_CMU_CP2AP_MIF_CLK_DIV2 150 +#define CLK_DOUT_CMU_CMU_OTP 151 +#define CLK_GOUT_CMU_DROOPDETECTOR 152 +#define CLK_GOUT_CMU_MIF_SWITCH 153 +#define CLK_GOUT_CMU_ABOX_CPUABOX 154 +#define CLK_GOUT_CMU_APM_BUS 155 +#define CLK_GOUT_CMU_BUS1_BUS 156 +#define CLK_GOUT_CMU_BUSC_BUS 157 +#define CLK_GOUT_CMU_BUSC_BUSPHSI2C 158 +#define CLK_GOUT_CMU_CAM_BUS 159 +#define CLK_GOUT_CMU_CAM_TPU0 160 +#define CLK_GOUT_CMU_CAM_TPU1 161 +#define CLK_GOUT_CMU_CAM_VRA 162 +#define CLK_GOUT_CMU_CIS_CLK0 163 +#define CLK_GOUT_CMU_CIS_CLK1 164 +#define CLK_GOUT_CMU_CIS_CLK2 165 +#define CLK_GOUT_CMU_CIS_CLK3 166 +#define CLK_GOUT_CMU_CORE_BUS 167 +#define CLK_GOUT_CMU_CPUCL0_SWITCH 168 +#define CLK_GOUT_CMU_CPUCL1_SWITCH 169 +#define CLK_GOUT_CMU_DBG_BUS 170 +#define CLK_GOUT_CMU_DCAM_BUS 171 +#define CLK_GOUT_CMU_DCAM_IMGD 172 +#define CLK_GOUT_CMU_DPU_BUS 173 +#define CLK_GOUT_CMU_DSP_BUS 174 +#define CLK_GOUT_CMU_FSYS0_BUS 175 +#define CLK_GOUT_CMU_FSYS0_DPGTC 176 +#define CLK_GOUT_CMU_FSYS0_MMC_EMBD 177 +#define CLK_GOUT_CMU_FSYS0_UFS_EMBD 178 +#define CLK_GOUT_CMU_FSYS0_USBDRD30 179 +#define CLK_GOUT_CMU_FSYS1_BUS 180 +#define CLK_GOUT_CMU_FSYS1_MMC_CARD 181 +#define CLK_GOUT_CMU_FSYS1_PCIE 182 +#define CLK_GOUT_CMU_FSYS1_UFS_CARD 183 +#define CLK_GOUT_CMU_G2D_G2D 184 +#define CLK_GOUT_CMU_G2D_JPEG 185 +#define CLK_GOUT_CMU_G3D_SWITCH 186 +#define CLK_GOUT_CMU_HPM 187 +#define CLK_GOUT_CMU_IMEM_BUS 188 +#define CLK_GOUT_CMU_ISPHQ_BUS 189 +#define CLK_GOUT_CMU_ISPLP_BUS 190 +#define CLK_GOUT_CMU_IVA_BUS 191 +#define CLK_GOUT_CMU_MFC_BUS 192 +#define CLK_GOUT_CMU_MODEM_SHARED0 193 +#define CLK_GOUT_CMU_MODEM_SHARED1 194 +#define CLK_GOUT_CMU_PERIC0_BUS 195 +#define CLK_GOUT_CMU_PERIC0_UART_DBG 196 +#define CLK_GOUT_CMU_PERIC0_USI00 197 +#define CLK_GOUT_CMU_PERIC0_USI01 198 +#define CLK_GOUT_CMU_PERIC0_USI02 199 +#define CLK_GOUT_CMU_PERIC0_USI03 200 +#define CLK_GOUT_CMU_PERIC1_BUS 201 +#define CLK_GOUT_CMU_PERIC1_SPEEDY2 202 +#define CLK_GOUT_CMU_PERIC1_SPI_CAM0 203 +#define CLK_GOUT_CMU_PERIC1_SPI_CAM1 204 +#define CLK_GOUT_CMU_PERIC1_UART_BT 205 +#define CLK_GOUT_CMU_PERIC1_USI04 206 +#define CLK_GOUT_CMU_PERIC1_USI05 207 +#define CLK_GOUT_CMU_PERIC1_USI06 208 +#define CLK_GOUT_CMU_PERIC1_USI07 209 +#define CLK_GOUT_CMU_PERIC1_USI08 210 +#define CLK_GOUT_CMU_PERIC1_USI09 211 +#define CLK_GOUT_CMU_PERIC1_USI10 212 +#define CLK_GOUT_CMU_PERIC1_USI11 213 +#define CLK_GOUT_CMU_PERIC1_USI12 214 +#define CLK_GOUT_CMU_PERIC1_USI13 215 +#define CLK_GOUT_CMU_PERIS_BUS 216 +#define CLK_GOUT_CMU_SRDZ_BUS 217 +#define CLK_GOUT_CMU_SRDZ_IMGD 218 +#define CLK_GOUT_CMU_VPU_BUS 219 + +/* CMU_PERIS */ +#define CLK_MOUT_PERIS_BUS_USER 1 +#define CLK_MOUT_PERIS_GIC 2 +#define CLK_GOUT_PERIS_CMU_PERIS_PCLK 3 +#define CLK_GOUT_PERIS_AD_AXI_P_PERIS_ACLKM 4 +#define CLK_GOUT_PERIS_AD_AXI_P_PERIS_ACLKS 5 +#define CLK_GOUT_PERIS_AXI2APB_PERISP0_ACLK 6 +#define CLK_GOUT_PERIS_AXI2APB_PERISP1_ACLK 7 +#define CLK_GOUT_PERIS_BUSIF_TMU_PCLK 8 +#define CLK_GOUT_PERIS_GIC_CLK 9 +#define CLK_GOUT_PERIS_LHM_AXI_P_PERIS_I_CLK 10 +#define CLK_GOUT_PERIS_MCT_PCLK 11 +#define CLK_GOUT_PERIS_OTP_CON_BIRA_PCLK 12 +#define CLK_GOUT_PERIS_OTP_CON_TOP_PCLK 13 +#define CLK_GOUT_PERIS_PMU_PERIS_PCLK 14 +#define CLK_GOUT_PERIS_RSTNSYNC_CLK_PERIS_BUSP_CLK 15 +#define CLK_GOUT_PERIS_RSTNSYNC_CLK_PERIS_GIC_CLK 16 +#define CLK_GOUT_PERIS_SYSREG_PERIS_PCLK 17 +#define CLK_GOUT_PERIS_TZPC00_PCLK 18 +#define CLK_GOUT_PERIS_TZPC01_PCLK 19 +#define CLK_GOUT_PERIS_TZPC02_PCLK 20 +#define CLK_GOUT_PERIS_TZPC03_PCLK 21 +#define CLK_GOUT_PERIS_TZPC04_PCLK 22 +#define CLK_GOUT_PERIS_TZPC05_PCLK 23 +#define CLK_GOUT_PERIS_TZPC06_PCLK 24 +#define CLK_GOUT_PERIS_TZPC07_PCLK 25 +#define CLK_GOUT_PERIS_TZPC08_PCLK 26 +#define CLK_GOUT_PERIS_TZPC09_PCLK 27 +#define CLK_GOUT_PERIS_TZPC10_PCLK 28 +#define CLK_GOUT_PERIS_TZPC11_PCLK 29 +#define CLK_GOUT_PERIS_TZPC12_PCLK 30 +#define CLK_GOUT_PERIS_TZPC13_PCLK 31 +#define CLK_GOUT_PERIS_TZPC14_PCLK 32 +#define CLK_GOUT_PERIS_TZPC15_PCLK 33 +#define CLK_GOUT_PERIS_WDT_CLUSTER0_PCLK 34 +#define CLK_GOUT_PERIS_WDT_CLUSTER1_PCLK 35 +#define CLK_GOUT_PERIS_XIU_P_PERIS_ACLK 36 + +/* CMU_FSYS0 */ +#define CLK_MOUT_FSYS0_BUS_USER 1 +#define CLK_MOUT_FSYS0_DPGTC_USER 2 +#define CLK_MOUT_FSYS0_MMC_EMBD_USER 3 +#define CLK_MOUT_FSYS0_UFS_EMBD_USER 4 +#define CLK_MOUT_FSYS0_USBDRD30_USER 5 +#define CLK_GOUT_FSYS0_FSYS0_CMU_FSYS0_PCLK 6 +#define CLK_GOUT_FSYS0_AHBBR_FSYS0_HCLK 7 +#define CLK_GOUT_FSYS0_AXI2AHB_FSYS0_ACLK 8 +#define CLK_GOUT_FSYS0_AXI2AHB_USB_FSYS0_ACLK 9 +#define CLK_GOUT_FSYS0_AXI2APB_FSYS0_ACLK 10 +#define CLK_GOUT_FSYS0_BTM_FSYS0_I_ACLK 11 +#define CLK_GOUT_FSYS0_BTM_FSYS0_I_PCLK 12 +#define CLK_GOUT_FSYS0_DP_LINK_I_GTC_EXT_CLK 13 +#define CLK_GOUT_FSYS0_DP_LINK_I_PCLK 14 +#define CLK_GOUT_FSYS0_ETR_MIU_I_ACLK 15 +#define CLK_GOUT_FSYS0_ETR_MIU_I_PCLK 16 +#define CLK_GOUT_FSYS0_GPIO_FSYS0_PCLK 17 +#define CLK_GOUT_FSYS0_LHM_AXI_D_USBTV_I_CLK 18 +#define CLK_GOUT_FSYS0_LHM_AXI_G_ETR_I_CLK 19 +#define CLK_GOUT_FSYS0_LHM_AXI_P_FSYS0_I_CLK 20 +#define CLK_GOUT_FSYS0_LHS_ACEL_D_FSYS0_I_CLK 21 +#define CLK_GOUT_FSYS0_MMC_EMBD_I_ACLK 22 +#define CLK_GOUT_FSYS0_MMC_EMBD_SDCLKIN 23 +#define CLK_GOUT_FSYS0_PMU_FSYS0_PCLK 24 +#define CLK_GOUT_FSYS0_BCM_FSYS0_ACLK 25 +#define CLK_GOUT_FSYS0_BCM_FSYS0_PCLK 26 +#define CLK_GOUT_FSYS0_RSTNSYNC_CLK_FSYS0_BUS_CLK 27 +#define CLK_GOUT_FSYS0_SYSREG_FSYS0_PCLK 28 +#define CLK_GOUT_FSYS0_UFS_EMBD_I_ACLK 29 +#define CLK_GOUT_FSYS0_UFS_EMBD_I_CLK_UNIPRO 30 +#define CLK_GOUT_FSYS0_UFS_EMBD_I_FMP_CLK 31 +#define CLK_GOUT_FSYS0_USBTV_I_USB30DRD_ACLK 32 +#define CLK_GOUT_FSYS0_USBTV_I_USB30DRD_REF_CLK 33 +#define CLK_GOUT_FSYS0_USBTV_I_USB30DRD_SUSPEND_CLK 34 +#define CLK_GOUT_FSYS0_USBTV_I_USBTVH_AHB_CLK 35 +#define CLK_GOUT_FSYS0_USBTV_I_USBTVH_CORE_CLK 36 +#define CLK_GOUT_FSYS0_USBTV_I_USBTVH_XIU_CLK 37 +#define CLK_GOUT_FSYS0_US_D_FSYS0_USB_ACLK 38 +#define CLK_GOUT_FSYS0_XIU_D_FSYS0_ACLK 39 +#define CLK_GOUT_FSYS0_XIU_D_FSYS0_USB_ACLK 40 +#define CLK_GOUT_FSYS0_XIU_P_FSYS0_ACLK 41 + +/* CMU_FSYS1 */ +#define CLK_MOUT_FSYS1_BUS_USER 1 +#define CLK_MOUT_FSYS1_MMC_CARD_USER 2 +#define CLK_MOUT_FSYS1_PCIE_USER 3 +#define CLK_MOUT_FSYS1_UFS_CARD_USER 4 +#define CLK_GOUT_FSYS1_PCIE_PHY_REF_CLK_IN 5 +#define CLK_GOUT_FSYS1_ADM_AHB_SSS_HCLKM 6 +#define CLK_GOUT_FSYS1_AHBBR_FSYS1_HCLK 7 +#define CLK_GOUT_FSYS1_AXI2AHB_FSYS1_ACLK 8 +#define CLK_GOUT_FSYS1_AXI2APB_FSYS1P0_ACLK 9 +#define CLK_GOUT_FSYS1_AXI2APB_FSYS1P1_ACLK 10 +#define CLK_GOUT_FSYS1_BTM_FSYS1_I_ACLK 11 +#define CLK_GOUT_FSYS1_BTM_FSYS1_I_PCLK 12 +#define CLK_GOUT_FSYS1_FSYS1_CMU_FSYS1_PCLK 13 +#define CLK_GOUT_FSYS1_GPIO_FSYS1_PCLK 14 +#define CLK_GOUT_FSYS1_LHM_AXI_P_FSYS1_I_CLK 15 +#define CLK_GOUT_FSYS1_LHS_ACEL_D_FSYS1_I_CLK 16 +#define CLK_GOUT_FSYS1_MMC_CARD_I_ACLK 17 +#define CLK_GOUT_FSYS1_MMC_CARD_SDCLKIN 18 +#define CLK_GOUT_FSYS1_PCIE_DBI_ACLK_0 19 +#define CLK_GOUT_FSYS1_PCIE_DBI_ACLK_1 20 +#define CLK_GOUT_FSYS1_PCIE_IEEE1500_WRAPPER_FOR_PCIE_PHY_LC_X2_INST_0_I_SCL_APB_PCLK 21 +#define CLK_GOUT_FSYS1_PCIE_MSTR_ACLK_0 22 +#define CLK_GOUT_FSYS1_PCIE_MSTR_ACLK_1 23 +#define CLK_GOUT_FSYS1_PCIE_PCIE_SUB_CTRL_INST_0_I_DRIVER_APB_CLK 24 +#define CLK_GOUT_FSYS1_PCIE_PCIE_SUB_CTRL_INST_1_I_DRIVER_APB_CLK 25 +#define CLK_GOUT_FSYS1_PCIE_PIPE2_DIGITAL_X2_WRAP_INST_0_I_APB_PCLK_SCL 26 +#define CLK_GOUT_FSYS1_PCIE_SLV_ACLK_0 27 +#define CLK_GOUT_FSYS1_PCIE_SLV_ACLK_1 28 +#define CLK_GOUT_FSYS1_PMU_FSYS1_PCLK 29 +#define CLK_GOUT_FSYS1_BCM_FSYS1_ACLK 30 +#define CLK_GOUT_FSYS1_BCM_FSYS1_PCLK 31 +#define CLK_GOUT_FSYS1_RSTNSYNC_CLK_FSYS1_BUS_CLK 32 +#define CLK_GOUT_FSYS1_RTIC_I_ACLK 33 +#define CLK_GOUT_FSYS1_RTIC_I_PCLK 34 +#define CLK_GOUT_FSYS1_SSS_I_ACLK 35 +#define CLK_GOUT_FSYS1_SSS_I_PCLK 36 +#define CLK_GOUT_FSYS1_SYSREG_FSYS1_PCLK 37 +#define CLK_GOUT_FSYS1_TOE_WIFI0_I_CLK 38 +#define CLK_GOUT_FSYS1_TOE_WIFI1_I_CLK 39 +#define CLK_GOUT_FSYS1_UFS_CARD_I_ACLK 40 +#define CLK_GOUT_FSYS1_UFS_CARD_I_CLK_UNIPRO 41 +#define CLK_GOUT_FSYS1_UFS_CARD_I_FMP_CLK 42 +#define CLK_GOUT_FSYS1_XIU_D_FSYS1_ACLK 43 +#define CLK_GOUT_FSYS1_XIU_P_FSYS1_ACLK 44 + +/* CMU_PERIC0 */ +#define CLK_MOUT_PERIC0_BUS_USER 1 +#define CLK_MOUT_PERIC0_UART_DBG_USER 2 +#define CLK_MOUT_PERIC0_USI00_USER 3 +#define CLK_MOUT_PERIC0_USI01_USER 4 +#define CLK_MOUT_PERIC0_USI02_USER 5 +#define CLK_MOUT_PERIC0_USI03_USER 6 +#define CLK_GOUT_PERIC0_PERIC0_CMU_PERIC0_PCLK 7 +#define CLK_GOUT_PERIC0_AXI2APB_PERIC0_ACLK 8 +#define CLK_GOUT_PERIC0_GPIO_PERIC0_PCLK 9 +#define CLK_GOUT_PERIC0_LHM_AXI_P_PERIC0_I_CLK 10 +#define CLK_GOUT_PERIC0_PMU_PERIC0_PCLK 11 +#define CLK_GOUT_PERIC0_PWM_I_PCLK_S0 12 +#define CLK_GOUT_PERIC0_RSTNSYNC_CLK_PERIC0_BUSP_CLK 13 +#define CLK_GOUT_PERIC0_SPEEDY2_TSP_CLK 14 +#define CLK_GOUT_PERIC0_SYSREG_PERIC0_PCLK 15 +#define CLK_GOUT_PERIC0_UART_DBG_EXT_UCLK 16 +#define CLK_GOUT_PERIC0_UART_DBG_PCLK 17 +#define CLK_GOUT_PERIC0_USI00_I_PCLK 18 +#define CLK_GOUT_PERIC0_USI00_I_SCLK_USI 19 +#define CLK_GOUT_PERIC0_USI01_I_PCLK 20 +#define CLK_GOUT_PERIC0_USI01_I_SCLK_USI 21 +#define CLK_GOUT_PERIC0_USI02_I_PCLK 22 +#define CLK_GOUT_PERIC0_USI02_I_SCLK_USI 23 +#define CLK_GOUT_PERIC0_USI03_I_PCLK 24 +#define CLK_GOUT_PERIC0_USI03_I_SCLK_USI 25 + +/* CMU_PERIC1 */ +#define CLK_MOUT_PERIC1_BUS_USER 1 +#define CLK_MOUT_PERIC1_SPEEDY2_USER 2 +#define CLK_MOUT_PERIC1_SPI_CAM0_USER 3 +#define CLK_MOUT_PERIC1_SPI_CAM1_USER 4 +#define CLK_MOUT_PERIC1_UART_BT_USER 5 +#define CLK_MOUT_PERIC1_USI04_USER 6 +#define CLK_MOUT_PERIC1_USI05_USER 7 +#define CLK_MOUT_PERIC1_USI06_USER 8 +#define CLK_MOUT_PERIC1_USI07_USER 9 +#define CLK_MOUT_PERIC1_USI08_USER 10 +#define CLK_MOUT_PERIC1_USI09_USER 11 +#define CLK_MOUT_PERIC1_USI10_USER 12 +#define CLK_MOUT_PERIC1_USI11_USER 13 +#define CLK_MOUT_PERIC1_USI12_USER 14 +#define CLK_MOUT_PERIC1_USI13_USER 15 +#define CLK_GOUT_PERIC1_PERIC1_CMU_PERIC1_PCLK 16 +#define CLK_GOUT_PERIC1_RSTNSYNC_CLK_PERIC1_SPEEDY2_CLK 17 +#define CLK_GOUT_PERIC1_AXI2APB_PERIC1P0_ACLK 18 +#define CLK_GOUT_PERIC1_AXI2APB_PERIC1P1_ACLK 19 +#define CLK_GOUT_PERIC1_AXI2APB_PERIC1P2_ACLK 20 +#define CLK_GOUT_PERIC1_GPIO_PERIC1_PCLK 21 +#define CLK_GOUT_PERIC1_HSI2C_CAM0_IPCLK 22 +#define CLK_GOUT_PERIC1_HSI2C_CAM1_IPCLK 23 +#define CLK_GOUT_PERIC1_HSI2C_CAM2_IPCLK 24 +#define CLK_GOUT_PERIC1_HSI2C_CAM3_IPCLK 25 +#define CLK_GOUT_PERIC1_LHM_AXI_P_PERIC1_I_CLK 26 +#define CLK_GOUT_PERIC1_PMU_PERIC1_PCLK 27 +#define CLK_GOUT_PERIC1_RSTNSYNC_CLK_PERIC1_BUSP_CLK 28 +#define CLK_GOUT_PERIC1_SPEEDY2_DDI1_CLK 29 +#define CLK_GOUT_PERIC1_SPEEDY2_DDI1_SCLK 30 +#define CLK_GOUT_PERIC1_SPEEDY2_DDI2_CLK 31 +#define CLK_GOUT_PERIC1_SPEEDY2_DDI2_SCLK 32 +#define CLK_GOUT_PERIC1_SPEEDY2_DDI_CLK 33 +#define CLK_GOUT_PERIC1_SPEEDY2_DDI_SCLK 34 +#define CLK_GOUT_PERIC1_SPEEDY2_TSP1_CLK 35 +#define CLK_GOUT_PERIC1_SPEEDY2_TSP2_CLK 36 +#define CLK_GOUT_PERIC1_SPI_CAM0_PCLK 37 +#define CLK_GOUT_PERIC1_SPI_CAM0_SPI_EXT_CLK 38 +#define CLK_GOUT_PERIC1_SPI_CAM1_PCLK 39 +#define CLK_GOUT_PERIC1_SPI_CAM1_SPI_EXT_CLK 40 +#define CLK_GOUT_PERIC1_SYSREG_PERIC1_PCLK 41 +#define CLK_GOUT_PERIC1_UART_BT_EXT_UCLK 42 +#define CLK_GOUT_PERIC1_UART_BT_PCLK 43 +#define CLK_GOUT_PERIC1_USI04_I_PCLK 44 +#define CLK_GOUT_PERIC1_USI04_I_SCLK_USI 45 +#define CLK_GOUT_PERIC1_USI05_I_PCLK 46 +#define CLK_GOUT_PERIC1_USI05_I_SCLK_USI 47 +#define CLK_GOUT_PERIC1_USI06_I_PCLK 48 +#define CLK_GOUT_PERIC1_USI06_I_SCLK_USI 49 +#define CLK_GOUT_PERIC1_USI07_I_PCLK 50 +#define CLK_GOUT_PERIC1_USI07_I_SCLK_USI 51 +#define CLK_GOUT_PERIC1_USI08_I_PCLK 52 +#define CLK_GOUT_PERIC1_USI08_I_SCLK_USI 53 +#define CLK_GOUT_PERIC1_USI09_I_PCLK 54 +#define CLK_GOUT_PERIC1_USI09_I_SCLK_USI 55 +#define CLK_GOUT_PERIC1_USI10_I_PCLK 56 +#define CLK_GOUT_PERIC1_USI10_I_SCLK_USI 57 +#define CLK_GOUT_PERIC1_USI11_I_PCLK 58 +#define CLK_GOUT_PERIC1_USI11_I_SCLK_USI 59 +#define CLK_GOUT_PERIC1_USI12_I_PCLK 60 +#define CLK_GOUT_PERIC1_USI12_I_SCLK_USI 61 +#define CLK_GOUT_PERIC1_USI13_I_PCLK 62 +#define CLK_GOUT_PERIC1_USI13_I_SCLK_USI 63 +#define CLK_GOUT_PERIC1_XIU_P_PERIC1_ACLK 64 + +#endif /* _DT_BINDINGS_CLOCK_EXYNOS8895_H */ -- cgit v1.2.3 From 8acdd0e7bfadda6b5103f2960d293581954454ed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 25 Oct 2024 08:37:18 +0800 Subject: blk-mq: add non_owner variant of start_freeze/unfreeze queue APIs Add non_owner variant of start_freeze/unfreeze queue APIs, so that the caller knows that what they are doing, and we can skip lockdep support for non_owner variant in per-call level. Prepare for supporting lockdep for freezing/unfreezing queue. Reviewed-by: Christoph Hellwig Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241025003722.3630252-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 59e9adf815a4..2035fad3131f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -919,6 +919,8 @@ void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); +void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); +void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); -- cgit v1.2.3 From f1be1788a32e8fa63416ad4518bbd1a85a825c9d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 25 Oct 2024 08:37:20 +0800 Subject: block: model freeze & enter queue as lock for supporting lockdep Recently we got several deadlock report[1][2][3] caused by blk_mq_freeze_queue and blk_enter_queue(). Turns out the two are just like acquiring read/write lock, so model them as read/write lock for supporting lockdep: 1) model q->q_usage_counter as two locks(io and queue lock) - queue lock covers sync with blk_enter_queue() - io lock covers sync with bio_enter_queue() 2) make the lockdep class/key as per-queue: - different subsystem has very different lock use pattern, shared lock class causes false positive easily - freeze_queue degrades to no lock in case that disk state becomes DEAD because bio_enter_queue() won't be blocked any more - freeze_queue degrades to no lock in case that request queue becomes dying because blk_enter_queue() won't be blocked any more 3) model blk_mq_freeze_queue() as acquire_exclusive & try_lock - it is exclusive lock, so dependency with blk_enter_queue() is covered - it is trylock because blk_mq_freeze_queue() are allowed to run concurrently 4) model blk_enter_queue() & bio_enter_queue() as acquire_read() - nested blk_enter_queue() are allowed - dependency with blk_mq_freeze_queue() is covered - blk_queue_exit() is often called from other contexts(such as irq), and it can't be annotated as lock_release(), so simply do it in blk_enter_queue(), this way still covered cases as many as possible With lockdep support, such kind of reports may be reported asap and needn't wait until the real deadlock is triggered. For example, lockdep report can be triggered in the report[3] with this patch applied. [1] occasional block layer hang when setting 'echo noop > /sys/block/sda/queue/scheduler' https://bugzilla.kernel.org/show_bug.cgi?id=219166 [2] del_gendisk() vs blk_queue_enter() race condition https://lore.kernel.org/linux-block/20241003085610.GK11458@google.com/ [3] queue_freeze & queue_enter deadlock in scsi https://lore.kernel.org/linux-block/ZxG38G9BuFdBpBHZ@fedora/T/#u Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241025003722.3630252-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 55bec14fe55f..d0a52ed05e60 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -25,6 +25,7 @@ #include #include #include +#include struct module; struct request_queue; @@ -474,6 +475,11 @@ struct request_queue { struct xarray hctx_table; struct percpu_ref q_usage_counter; + struct lock_class_key io_lock_cls_key; + struct lockdep_map io_lockdep_map; + + struct lock_class_key q_lock_cls_key; + struct lockdep_map q_lockdep_map; struct request *last_merge; -- cgit v1.2.3 From 30dac24e14b52e1787572d1d4e06eeabe8a63630 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Thu, 26 Sep 2024 16:01:21 +0200 Subject: fs/writeback: convert wbc_account_cgroup_owner to take a folio Most of the callers of wbc_account_cgroup_owner() are converting a folio to page before calling the function. wbc_account_cgroup_owner() is converting the page back to a folio to call mem_cgroup_css_from_folio(). Convert wbc_account_cgroup_owner() to take a folio instead of a page, and convert all callers to pass a folio directly except f2fs. Convert the page to folio for all the callers from f2fs as they were the only callers calling wbc_account_cgroup_owner() with a page. As f2fs is already in the process of converting to folios, these call sites might also soon be calling wbc_account_cgroup_owner() with a folio directly in the future. No functional changes. Only compile tested. Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20240926140121.203821-1-kernel@pankajraghav.com Acked-by: David Sterba Acked-by: Tejun Heo Signed-off-by: Christian Brauner --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d6db822e4bb3..641a057e0413 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -217,7 +217,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock); void wbc_detach_inode(struct writeback_control *wbc); -void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done); @@ -324,7 +324,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) } static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, - struct page *page, size_t bytes) + struct folio *folio, size_t bytes) { } -- cgit v1.2.3 From 0e152beb5aa1ccdac9aae9fa570a9e039aff7a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 21 Oct 2024 13:37:17 -0300 Subject: libfs: Create the helper function generic_ci_validate_strict_name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a helper function for filesystems do the checks required for casefold directories and strict encoding. Suggested-by: Gabriel Krisman Bertazi Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-1-f443d5814194@igalia.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3559446279c1..403ee5d54c60 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -3456,6 +3457,50 @@ extern int generic_ci_match(const struct inode *parent, const struct qstr *folded_name, const u8 *de_name, u32 de_name_len); +#if IS_ENABLED(CONFIG_UNICODE) +/** + * generic_ci_validate_strict_name - Check if a given name is suitable + * for a directory + * + * This functions checks if the proposed filename is valid for the + * parent directory. That means that only valid UTF-8 filenames will be + * accepted for casefold directories from filesystems created with the + * strict encoding flag. That also means that any name will be + * accepted for directories that doesn't have casefold enabled, or + * aren't being strict with the encoding. + * + * @dir: inode of the directory where the new file will be created + * @name: name of the new file + * + * Return: + * * True if the filename is suitable for this directory. It can be + * true if a given name is not suitable for a strict encoding + * directory, but the directory being used isn't strict + * * False if the filename isn't suitable for this directory. This only + * happens when a directory is casefolded and the filesystem is strict + * about its encoding. + */ +static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +{ + if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb)) + return true; + + /* + * A casefold dir must have a encoding set, unless the filesystem + * is corrupted + */ + if (WARN_ON_ONCE(!dir->i_sb->s_encoding)) + return true; + + return !utf8_validate(dir->i_sb->s_encoding, name); +} +#else +static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +{ + return true; +} +#endif + static inline bool sb_has_encoding(const struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) -- cgit v1.2.3 From 04dad6c6d37d741bad9946a92171bfa637e989f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 21 Oct 2024 13:37:19 -0300 Subject: unicode: Export latest available UTF-8 version number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export latest available UTF-8 version number so filesystems can easily load the newest one. Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-3-f443d5814194@igalia.com Acked-by: Gabriel Krisman Bertazi Signed-off-by: Christian Brauner --- include/linux/unicode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 4d39e6e11a95..0c0ab04e84ee 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -16,6 +16,8 @@ struct utf8data_table; ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ ((unsigned int)(REV))) +#define UTF8_LATEST UNICODE_AGE(12, 1, 0) + static inline u8 unicode_major(unsigned int age) { return (age >> UNICODE_MAJ_SHIFT) & 0xff; -- cgit v1.2.3 From 142fa60f61f93805471012f24e029af6d113c5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 21 Oct 2024 13:37:20 -0300 Subject: unicode: Recreate utf8_parse_version() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All filesystems that currently support UTF-8 casefold can fetch the UTF-8 version from the filesystem metadata stored on disk. They can get the data stored and directly match it to a integer, so they can skip the string parsing step, which motivated the removal of this function in the first place. However, for tmpfs, the only way to tell the kernel which UTF-8 version we are about to use is via mount options, using a string. Re-introduce utf8_parse_version() to be used by tmpfs. This version differs from the original by skipping the intermediate step of copying the version string to an auxiliary string before calling match_token(). This versions calls match_token() in the argument string. The paramenters are simpler now as well. utf8_parse_version() was created by 9d53690f0d4 ("unicode: implement higher level API for string handling") and later removed by 49bd03cc7e9 ("unicode: pass a UNICODE_AGE() tripple to utf8_load"). Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-4-f443d5814194@igalia.com Reviewed-by: Theodore Ts'o Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Christian Brauner --- include/linux/unicode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 0c0ab04e84ee..5e6b212a2aed 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -78,4 +78,6 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct unicode_map *utf8_load(unsigned int version); void utf8_unload(struct unicode_map *um); +int utf8_parse_version(char *version); + #endif /* _LINUX_UNICODE_H */ -- cgit v1.2.3 From 458532c8dfeb24edd5e07467605a6484a728e5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 21 Oct 2024 13:37:21 -0300 Subject: libfs: Export generic_ci_ dentry functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export generic_ci_ dentry functions so they can be used by case-insensitive filesystems that need something more custom than the default one set by `struct generic_ci_dentry_ops`. Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-5-f443d5814194@igalia.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 403ee5d54c60..b277369672a1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3458,6 +3458,10 @@ extern int generic_ci_match(const struct inode *parent, const u8 *de_name, u32 de_name_len); #if IS_ENABLED(CONFIG_UNICODE) +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); + /** * generic_ci_validate_strict_name - Check if a given name is suitable * for a directory -- cgit v1.2.3 From 5cd9aecbc72c07e8b83e900078669a7d0bc5f98f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 21 Oct 2024 13:37:23 -0300 Subject: tmpfs: Add flag FS_CASEFOLD_FL support for tmpfs dirs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable setting flag FS_CASEFOLD_FL for tmpfs directories, when tmpfs is mounted with casefold support. A special check is need for this flag, since it can't be set for non-empty directories. Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-7-f443d5814194@igalia.com Signed-off-by: Christian Brauner --- include/linux/shmem_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 515a9a6a3c6f..018da28c01e7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -42,10 +42,10 @@ struct shmem_inode_info { struct inode vfs_inode; }; -#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE +#define SHMEM_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_CASEFOLD_FL) #define SHMEM_FL_USER_MODIFIABLE \ - (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) -#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) + (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL) +#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL) struct shmem_quota_limits { qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */ -- cgit v1.2.3 From 9f070b1862f3411b8bcdfd51a8eaad25286f9deb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 14 Oct 2024 16:52:41 +0200 Subject: media: v4l2-core: v4l2-dv-timings: check cvt/gtf result The v4l2_detect_cvt/gtf functions should check the result against the timing capabilities: these functions calculate the timings, so if they are out of bounds, they should be rejected. To do this, add the struct v4l2_dv_timings_cap as argument to those functions. This required updates to the adv7604 and adv7842 drivers since the prototype of these functions has now changed. The timings struct that is passed to v4l2_detect_cvt/gtf in those two drivers is filled with the timings detected by the hardware. The vivid driver was also updated, but an additional check was added: the width and height specified by VIDIOC_S_DV_TIMINGS has to match the calculated result, otherwise something went wrong. Note that vivid *emulates* hardware, so all the values passed to the v4l2_detect_cvt/gtf functions came from the timings struct that was filled by userspace and passed on to the driver via VIDIOC_S_DV_TIMINGS. So these fields can contain random data. Both the constraints check via struct v4l2_dv_timings_cap and the additional width/height check ensure that the resulting timings are sane and not messed up by the v4l2_detect_cvt/gtf calculations. Signed-off-by: Hans Verkuil Fixes: 2576415846bc ("[media] v4l2: move dv-timings related code to v4l2-dv-timings.c") Cc: stable@vger.kernel.org Reported-by: syzbot+a828133770f62293563e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-media/000000000000013050062127830a@google.com/ --- include/media/v4l2-dv-timings.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-dv-timings.h b/include/media/v4l2-dv-timings.h index 13830411bd6c..ff07dc6b103c 100644 --- a/include/media/v4l2-dv-timings.h +++ b/include/media/v4l2-dv-timings.h @@ -147,15 +147,18 @@ void v4l2_print_dv_timings(const char *dev_prefix, const char *prefix, * @polarities: the horizontal and vertical polarities (same as struct * v4l2_bt_timings polarities). * @interlaced: if this flag is true, it indicates interlaced format + * @cap: the v4l2_dv_timings_cap capabilities. * @fmt: the resulting timings. * * This function will attempt to detect if the given values correspond to a * valid CVT format. If so, then it will return true, and fmt will be filled * in with the found CVT timings. */ -bool v4l2_detect_cvt(unsigned frame_height, unsigned hfreq, unsigned vsync, - unsigned active_width, u32 polarities, bool interlaced, - struct v4l2_dv_timings *fmt); +bool v4l2_detect_cvt(unsigned int frame_height, unsigned int hfreq, + unsigned int vsync, unsigned int active_width, + u32 polarities, bool interlaced, + const struct v4l2_dv_timings_cap *cap, + struct v4l2_dv_timings *fmt); /** * v4l2_detect_gtf - detect if the given timings follow the GTF standard @@ -171,15 +174,18 @@ bool v4l2_detect_cvt(unsigned frame_height, unsigned hfreq, unsigned vsync, * image height, so it has to be passed explicitly. Usually * the native screen aspect ratio is used for this. If it * is not filled in correctly, then 16:9 will be assumed. + * @cap: the v4l2_dv_timings_cap capabilities. * @fmt: the resulting timings. * * This function will attempt to detect if the given values correspond to a * valid GTF format. If so, then it will return true, and fmt will be filled * in with the found GTF timings. */ -bool v4l2_detect_gtf(unsigned frame_height, unsigned hfreq, unsigned vsync, - u32 polarities, bool interlaced, struct v4l2_fract aspect, - struct v4l2_dv_timings *fmt); +bool v4l2_detect_gtf(unsigned int frame_height, unsigned int hfreq, + unsigned int vsync, u32 polarities, bool interlaced, + struct v4l2_fract aspect, + const struct v4l2_dv_timings_cap *cap, + struct v4l2_dv_timings *fmt); /** * v4l2_calc_aspect_ratio - calculate the aspect ratio based on bytes -- cgit v1.2.3 From d4a65302dd849fade9e2ca712826c35b8d068ecb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Oct 2024 13:34:26 +0000 Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures With the introduction of an architecture-independent defintion of PAGE_MASK, we had to make a choice between defining it as 'unsigned long' as on 64-bit architectures, or as signed 'long' as required for architectures with a 64-bit phys_addr_t. To reduce the risk for regressions and minimize the changes in behavior, the result was using the signed value only when CONFIG_PHYS_ADDR_T_64BIT is set, but that ended up causing a regression after all in the early_init_dt_add_memory_arch() function that uses 64-bit integers for address calculation. Presumably the same regression also affects mips32 and powerpc32 when dealing with large amounts of memory on DT platforms: like arm32, they were using the signed version unconditionally. The two most sensible options for addressing the regression are either to go back to an architecture specific definition, using a signed constant on arm/powerpc/mips and unsigned on the others, or to use the same definition everywhere. Use the simpler of those two and change them all to the signed version, in the hope that this does not cause a different type of bug. Most of the other 32-bit architectures have no large physical address support and are rarely used, so it seems more likely that using the same definition helps than hurts here. In particular, x86-32 does have physical addressing extensions, so it already changed to the signed version after the previous patch, so it makes sense to use the same version on non-PAE as well. Fixes: efe8419ae78d ("vdso: Introduce vdso/page.h") Reported-by: Naresh Kamboju Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Tested-by: Anders Roxell Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/all/20241024133447.3117273-1-arnd@kernel.org Link: https://lore.kernel.org/lkml/CA+G9fYt86bUAu_v5dXPWnDUwQNVipj+Wq3Djir1KUSKdr9QLNg@mail.gmail.com/ --- include/vdso/page.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/vdso/page.h b/include/vdso/page.h index 4ada1ba6bd1f..710ae2414e68 100644 --- a/include/vdso/page.h +++ b/include/vdso/page.h @@ -14,13 +14,14 @@ #define PAGE_SIZE (_AC(1,UL) << CONFIG_PAGE_SHIFT) -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT) +#if !defined(CONFIG_64BIT) /* - * Applies only to 32-bit architectures with a 64-bit phys_addr_t. + * Applies only to 32-bit architectures. * * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long. * So if we assign PAGE_MASK to a larger type it gets extended the - * way we want (i.e. with 1s in the high bits) + * way we want (i.e. with 1s in the high bits) while masking a + * 64-bit value such as phys_addr_t. */ #define PAGE_MASK (~((1 << CONFIG_PAGE_SHIFT) - 1)) #else -- cgit v1.2.3 From a2ad1b8101a36c927bcbd1317475b9576c352155 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 25 Oct 2024 06:11:51 -0700 Subject: mm/gup: Add folio_add_pins() Export a function that adds pins to an already-pinned huge-page folio. This allows any range of small pages within the folio to be unpinned later. For example, pages pinned via memfd_pin_folios and modified by folio_add_pins could be unpinned via unpin_user_page(s). Link: https://patch.msgid.link/r/1729861919-234514-2-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Suggested-by: David Hildenbrand Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Signed-off-by: Jason Gunthorpe --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecf63d2b0582..f9de33ac2add 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2524,6 +2524,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, struct folio **folios, unsigned int max_folios, pgoff_t *offset); +int folio_add_pins(struct folio *folio, unsigned int pins); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); -- cgit v1.2.3 From f4986a72d6e4be78ec0e4ee0e03531474621183f Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 25 Oct 2024 06:11:57 -0700 Subject: iommufd: Add IOMMU_IOAS_MAP_FILE Define the IOMMU_IOAS_MAP_FILE ioctl interface, which allows a user to register memory by passing a memfd plus offset and length. Implement it using the memfd_pin_folios() kAPI. Link: https://patch.msgid.link/r/1729861919-234514-8-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..41b1a01e9293 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -51,6 +51,7 @@ enum { IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, + IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, }; /** @@ -213,6 +214,30 @@ struct iommu_ioas_map { }; #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) +/** + * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE) + * @size: sizeof(struct iommu_ioas_map_file) + * @flags: same as for iommu_ioas_map + * @ioas_id: same as for iommu_ioas_map + * @fd: the memfd to map + * @start: byte offset from start of file to map from + * @length: same as for iommu_ioas_map + * @iova: same as for iommu_ioas_map + * + * Set an IOVA mapping from a memfd file. All other arguments and semantics + * match those of IOMMU_IOAS_MAP. + */ +struct iommu_ioas_map_file { + __u32 size; + __u32 flags; + __u32 ioas_id; + __s32 fd; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 iova; +}; +#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE) + /** * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) * @size: sizeof(struct iommu_ioas_copy) -- cgit v1.2.3 From 66418687ac895717dc2f6ddffe24cf9b74cd0d3e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 10 Oct 2024 10:24:42 -0500 Subject: kernel/range: Const-ify range_contains parameters range_contains() does not modify the range values. David suggested it is safer to keep those parameters as const.[1] Make range parameters const Link: https://lore.kernel.org/all/20241008161032.GB1609@twin.jikos.cz/ [1] Reviewed-by: Dan Williams Reviewed-by: David Sterba Link: https://patch.msgid.link/20241010-const-range-v1-1-afb6e4bfd8ce@intel.com Signed-off-by: Ira Weiny Signed-off-by: Dave Jiang --- include/linux/range.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7ad..7dc5e835e079 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,7 +13,8 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } -static inline bool range_contains(struct range *r1, struct range *r2) +static inline bool range_contains(const struct range *r1, + const struct range *r2) { return r1->start <= r2->start && r1->end >= r2->end; } -- cgit v1.2.3 From 0bbff9ed81654d5f06bfca484681756ee407f924 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 2 Oct 2024 13:43:24 -0500 Subject: perf/arm_pmuv3: Add PMUv3.9 per counter EL0 access control Armv8.9/9.4 PMUv3.9 adds per counter EL0 access controls. Per counter access is enabled with the UEN bit in PMUSERENR_EL1 register. Individual counters are enabled/disabled in the PMUACR_EL1 register. When UEN is set, the CR/ER bits control EL0 write access and must be set to disable write access. With the access controls, the clearing of unused counters can be skipped. KVM also configures PMUSERENR_EL1 in order to trap to EL2. UEN does not need to be set for it since only PMUv3.5 is exposed to guests. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20241002184326.1105499-1-robh@kernel.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmuv3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 3372c1b56486..d698efba28a2 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -257,6 +257,7 @@ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_UEN (1 << 4) /* Fine grained per counter access at EL0 */ /* Mask for writable bits */ #define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) -- cgit v1.2.3 From e1dce56443a4a18978fe39ee4af663e5b6b31422 Mon Sep 17 00:00:00 2001 From: Gowthami Thiagarajan Date: Mon, 28 Oct 2024 11:23:09 +0530 Subject: perf/marvell: Marvell PEM performance monitor support PCI Express Interface PMU includes various performance counters to monitor the data that is transmitted over the PCIe link. The counters track various inbound and outbound transactions which includes separate counters for posted/non-posted/completion TLPs. Also, inbound and outbound memory read requests along with their latencies can also be monitored. Address Translation Services(ATS)events such as ATS Translation, ATS Page Request, ATS Invalidation along with their corresponding latencies are also supported. The performance counters are 64 bits wide. For instance, perf stat -e ib_tlp_pr tracks the inbound posted TLPs for the workload. Co-developed-by: Linu Cherian Signed-off-by: Linu Cherian Signed-off-by: Gowthami Thiagarajan Link: https://lore.kernel.org/r/20241028055309.17893-1-gthiagarajan@marvell.com Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2361ed4d2b15..61d9a66d1807 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -227,6 +227,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, -- cgit v1.2.3 From dc60de4eb0a431bde94e5abe55be6f646cdb435d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 24 Oct 2024 22:04:54 +0300 Subject: iio: acpi: Add iio_get_acpi_device_name_and_data() helper function A few drivers duplicate the code to retrieve ACPI device instance name. Some of them want an associated driver data as well. In order of deduplication introduce the common helper functions. Reviewed-by: Hans de Goede Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20241024191200.229894-6-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 3a9b57187a95..445d6666a291 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -831,6 +831,7 @@ int iio_device_resume_triggering(struct iio_dev *indio_dev); bool iio_read_acpi_mount_matrix(struct device *dev, struct iio_mount_matrix *orientation, char *acpi_method); +const char *iio_get_acpi_device_name_and_data(struct device *dev, const void **data); #else static inline bool iio_read_acpi_mount_matrix(struct device *dev, struct iio_mount_matrix *orientation, @@ -838,7 +839,16 @@ static inline bool iio_read_acpi_mount_matrix(struct device *dev, { return false; } +static inline const char * +iio_get_acpi_device_name_and_data(struct device *dev, const void **data) +{ + return NULL; +} #endif +static inline const char *iio_get_acpi_device_name(struct device *dev) +{ + return iio_get_acpi_device_name_and_data(dev, NULL); +} /** * iio_get_current_scan_type - Get the current scan type for a channel -- cgit v1.2.3 From cb67ff6272eceb5fcb2fe3b74f0293fa0706841a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 22 Oct 2024 12:30:50 -0400 Subject: drm/amdkfd: flag per-queue reset support for gfx9 Flag KFD support for per-queue reset on GFX9 devices. Signed-off-by: Jonathan Kim Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_sysfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 5e8d28617efa..859b8e91d4d3 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -60,7 +60,8 @@ #define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 #define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000 #define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED 0x40000000 -#define HSA_CAP_RESERVED 0x800f8000 +#define HSA_CAP_PER_QUEUE_RESET_SUPPORTED 0x80000000 +#define HSA_CAP_RESERVED 0x000f8000 /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f -- cgit v1.2.3 From 4261974701851630951e9ab31f0de4ade0faea53 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 25 Oct 2024 19:46:55 -0500 Subject: printf: Add print format (%pra) for struct range The use of struct range in the CXL subsystem is growing. In particular, the addition of Dynamic Capacity devices uses struct range in a number of places which are reported in debug and error messages. To wit requiring the printing of the start/end fields in each print became cumbersome. Dan Williams mentions in [1] that it might be time to have a print specifier for struct range similar to struct resource. A few alternatives were considered including '%par', '%r', and '%pn'. %pra follows that struct range is similar to struct resource (%p[rR]) but needs to be different. Based on discussions with Petr and Andy '%pra' was chosen.[2] Andy also suggested to keep the range prints similar to struct resource though combined code. Add hex_range() to handle printing for both pointer types. Finally introduce DEFINE_RANGE() as a parallel to DEFINE_RES_*() and use it in the tests. Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: open list Link: https://lore.kernel.org/all/663922b475e50_d54d72945b@dwillia2-xfh.jf.intel.com.notmuch/ [1] Link: https://lore.kernel.org/all/66cea3bf3332f_f937b29424@iweiny-mobl.notmuch/ [2] Suggested-by: Dan Williams Signed-off-by: Ira Weiny Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20241025-cxl-pra-v2-3-123a825daba2@intel.com Signed-off-by: Dave Jiang --- include/linux/range.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7ad..1358d4b1807a 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -31,4 +31,10 @@ int clean_sort_range(struct range *range, int az); void sort_range(struct range *range, int nr_range); +#define DEFINE_RANGE(_start, _end) \ +(struct range) { \ + .start = (_start), \ + .end = (_end), \ + } + #endif -- cgit v1.2.3 From 6d89ead19946181df1e41d38917fddc951dbd95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 12 Jul 2024 11:35:23 +0200 Subject: UAPI/ioctl: Improve parameter name of ioctl request definition helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The third parameter to _IOR et al is a type name, not a size. So the parameter being named "size" is irritating. Rename it to "argtype" instead to reduce confusion. There is a very minor chance that this breaks stuff. It only hurts however if there is a variable (or macro) in userspace that is called "argtype" *and* it's used in the parameters of _IOR and friends. IMHO this is negligible because usually definitions making use of these macros are provided by kernel headers (i.e. us) or if they are replicated in userspace code, they are replicated and so supposed to match the kernel definitions (e.g. to make them usable by programs without the need to update the kernel headers used to compile the program). Signed-off-by: Uwe Kleine-König Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/ioctl.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h index a84f4db8a250..e3290a5824c9 100644 --- a/include/uapi/asm-generic/ioctl.h +++ b/include/uapi/asm-generic/ioctl.h @@ -82,13 +82,13 @@ * NOTE: _IOW means userland is writing and kernel is reading. _IOR * means userland is reading and kernel is writing. */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,argtype) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOW(type,nr,argtype) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOWR(type,nr,argtype) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOR_BAD(type,nr,argtype) _IOC(_IOC_READ,(type),(nr),sizeof(argtype)) +#define _IOW_BAD(type,nr,argtype) _IOC(_IOC_WRITE,(type),(nr),sizeof(argtype)) +#define _IOWR_BAD(type,nr,argtype) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(argtype)) /* used to decode ioctl numbers.. */ #define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -- cgit v1.2.3 From 7c7e6c8924e7bf98db4e5b2edb202842003c00c2 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 24 Oct 2024 19:54:43 +0200 Subject: tty: serial: handle HAS_IOPORT dependencies In a future patch HAS_IOPORT=n will disable inb()/outb() and friends at compile time. We thus need to add HAS_IOPORT as dependency for those drivers using them unconditionally. Some 8250 serial drivers support MMIO only use, so fence only the parts requiring I/O ports and print an error message if a device can't be supported with the current configuration. Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Acked-by: Greg Kroah-Hartman Reviewed-by: Maciej W. Rozycki Signed-off-by: Niklas Schnelle Signed-off-by: Arnd Bergmann --- include/linux/serial_core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 4ab65874a850..743b4afaad4c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -505,7 +505,11 @@ struct uart_port { * The remaining bits are serial-core specific and not modifiable by * userspace. */ +#ifdef CONFIG_HAS_IOPORT #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) +#else +#define UPF_FOURPORT 0 +#endif #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) -- cgit v1.2.3 From 6f043e75744596968b6547c4bd43e4d30bbb6d6e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 24 Oct 2024 19:54:44 +0200 Subject: asm-generic/io.h: Remove I/O port accessors for HAS_IOPORT=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With all subsystems and drivers either declaring their dependence on HAS_IOPORT or fencing I/O port specific code sections we can finally make inb()/outb() and friends compile-time dependent on HAS_IOPORT as suggested by Linus in the linked mail. The main benefit of this is that on platforms such as s390 which have no meaningful way of implementing inb()/outb() their use without the proper HAS_IOPORT dependency will result in easy to catch and fix compile-time errors instead of compiling code that can never work. Link: https://lore.kernel.org/lkml/CAHk-=wg80je=K7madF4e7WrRNp37e3qh6y10Svhdc7O8SZ_-8g@mail.gmail.com/ Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Niklas Schnelle Reviewed-by: Takashi Iwai Reviewed-by: Ville Syrjälä Reviewed-by: Jarkko Sakkinen Acked-by: Lucas De Marchi Acked-by: Maciej W. Rozycki Acked-by: Damien Le Moal Acked-by: Jaroslav Kysela Acked-by: Greg Kroah-Hartman Acked-by: Rafael J. Wysocki Acked-by: Johannes Berg # for ARCH=um Acked-by: Geert Uytterhoeven Acked-by: Kalle Valo Acked-by: Jakub Kicinski Acked-by: Corey Minyard Acked-by: Guenter Roeck Acked-by: Damien Le Moal Acked-by: Sebastian Reichel Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 80de699bf6af..1027be6a62bc 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -540,6 +540,7 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer, #if !defined(inb) && !defined(_inb) #define _inb _inb +#ifdef CONFIG_HAS_IOPORT static inline u8 _inb(unsigned long addr) { u8 val; @@ -549,10 +550,15 @@ static inline u8 _inb(unsigned long addr) __io_par(val); return val; } +#else +u8 _inb(unsigned long addr) + __compiletime_error("inb()) requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(inw) && !defined(_inw) #define _inw _inw +#ifdef CONFIG_HAS_IOPORT static inline u16 _inw(unsigned long addr) { u16 val; @@ -562,10 +568,15 @@ static inline u16 _inw(unsigned long addr) __io_par(val); return val; } +#else +u16 _inw(unsigned long addr) + __compiletime_error("inw() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(inl) && !defined(_inl) #define _inl _inl +#ifdef CONFIG_HAS_IOPORT static inline u32 _inl(unsigned long addr) { u32 val; @@ -575,36 +586,55 @@ static inline u32 _inl(unsigned long addr) __io_par(val); return val; } +#else +u32 _inl(unsigned long addr) + __compiletime_error("inl() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outb) && !defined(_outb) #define _outb _outb +#ifdef CONFIG_HAS_IOPORT static inline void _outb(u8 value, unsigned long addr) { __io_pbw(); __raw_writeb(value, PCI_IOBASE + addr); __io_paw(); } +#else +void _outb(u8 value, unsigned long addr) + __compiletime_error("outb() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outw) && !defined(_outw) #define _outw _outw +#ifdef CONFIG_HAS_IOPORT static inline void _outw(u16 value, unsigned long addr) { __io_pbw(); __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); __io_paw(); } +#else +void _outw(u16 value, unsigned long addr) + __compiletime_error("outw() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outl) && !defined(_outl) #define _outl _outl +#ifdef CONFIG_HAS_IOPORT static inline void _outl(u32 value, unsigned long addr) { __io_pbw(); __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); __io_paw(); } +#else +void _outl(u32 value, unsigned long addr) + __compiletime_error("outl() requires CONFIG_HAS_IOPORT"); +#endif #endif #include @@ -688,53 +718,83 @@ static inline void outl_p(u32 value, unsigned long addr) #ifndef insb #define insb insb +#ifdef CONFIG_HAS_IOPORT static inline void insb(unsigned long addr, void *buffer, unsigned int count) { readsb(PCI_IOBASE + addr, buffer, count); } +#else +void insb(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insb() requires HAS_IOPORT"); +#endif #endif #ifndef insw #define insw insw +#ifdef CONFIG_HAS_IOPORT static inline void insw(unsigned long addr, void *buffer, unsigned int count) { readsw(PCI_IOBASE + addr, buffer, count); } +#else +void insw(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insw() requires HAS_IOPORT"); +#endif #endif #ifndef insl #define insl insl +#ifdef CONFIG_HAS_IOPORT static inline void insl(unsigned long addr, void *buffer, unsigned int count) { readsl(PCI_IOBASE + addr, buffer, count); } +#else +void insl(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insl() requires HAS_IOPORT"); +#endif #endif #ifndef outsb #define outsb outsb +#ifdef CONFIG_HAS_IOPORT static inline void outsb(unsigned long addr, const void *buffer, unsigned int count) { writesb(PCI_IOBASE + addr, buffer, count); } +#else +void outsb(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsb() requires HAS_IOPORT"); +#endif #endif #ifndef outsw #define outsw outsw +#ifdef CONFIG_HAS_IOPORT static inline void outsw(unsigned long addr, const void *buffer, unsigned int count) { writesw(PCI_IOBASE + addr, buffer, count); } +#else +void outsw(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsw() requires HAS_IOPORT"); +#endif #endif #ifndef outsl #define outsl outsl +#ifdef CONFIG_HAS_IOPORT static inline void outsl(unsigned long addr, const void *buffer, unsigned int count) { writesl(PCI_IOBASE + addr, buffer, count); } +#else +void outsl(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsl() requires HAS_IOPORT"); +#endif #endif #ifndef insb_p -- cgit v1.2.3 From c5c3238d9b8cee58cd4b08bbbe9347a94a566390 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 07:36:36 +0200 Subject: asm-generic: provide generic page_to_phys and phys_to_page implementations page_to_phys is duplicated by all architectures, and from some strange reason placed in where it doesn't fit at all. phys_to_page is only provided by a few architectures despite having a lot of open coded users. Provide generic versions in to make these helpers more easily usable. Note with this patch powerpc loses the CONFIG_DEBUG_VIRTUAL pfn_valid check. It will be added back in a generic version later. Signed-off-by: Christoph Hellwig Signed-off-by: Arnd Bergmann --- include/asm-generic/memory_model.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 6796abe1900e..a73a140cbecd 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -64,6 +64,9 @@ static inline int pfn_valid(unsigned long pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page +#define page_to_phys(page) PFN_PHYS(page_to_pfn(page)) +#define phys_to_page(phys) pfn_to_page(PHYS_PFN(phys)) + #endif /* __ASSEMBLY__ */ #endif -- cgit v1.2.3 From 3e25d5a49f99b75be2c6cfb165e4f77dc6d739a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 07:36:37 +0200 Subject: asm-generic: add an optional pfn_valid check to page_to_phys page_to_pfn is usually implemented by pointer arithmetics on the memory map, which means that bogus input can lead to even more bogus output. Powerpc had a pfn_valid check on the intermediate pfn in the page_to_phys implementation when CONFIG_DEBUG_VIRTUAL is defined, which seems generally useful, so add that to the generic version. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Huth Signed-off-by: Arnd Bergmann --- include/asm-generic/memory_model.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index a73a140cbecd..6d1fb6162ac1 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -64,7 +64,17 @@ static inline int pfn_valid(unsigned long pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page +#ifdef CONFIG_DEBUG_VIRTUAL +#define page_to_phys(page) \ +({ \ + unsigned long __pfn = page_to_pfn(page); \ + \ + WARN_ON_ONCE(!pfn_valid(__pfn)); \ + PFN_PHYS(__pfn); \ +}) +#else #define page_to_phys(page) PFN_PHYS(page_to_pfn(page)) +#endif /* CONFIG_DEBUG_VIRTUAL */ #define phys_to_page(phys) pfn_to_page(PHYS_PFN(phys)) #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 00a31dd3acea0f88f947fc71e268ebb34b59f218 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 3 Oct 2024 17:16:14 -0400 Subject: asm-generic/div64: optimize/simplify __div64_const32() Several years later I just realized that this code could be greatly simplified. First, let's formalize the need for overflow handling in __arch_xprod64(). Assuming n = UINT64_MAX, there are 2 cases where an overflow may occur: 1) If a bias must be added, we have m_lo * n_lo + m or m_lo * 0xffffffff + ((m_hi << 32) + m_lo) or ((m_lo << 32) - m_lo) + ((m_hi << 32) + m_lo) or (m_lo + m_hi) << 32 which must be < (1 << 64). So the criteria for no overflow is m_lo + m_hi < (1 << 32). 2) The cross product m_lo * n_hi + m_hi * n_lo or m_lo * 0xffffffff + m_hi * 0xffffffff or ((m_lo << 32) - m_lo) + ((m_hi << 32) - m_hi). Assuming the top result from the previous step (m_lo + m_hi) that must be added to this, we get (m_lo + m_hi) << 32 again. So let's have a straight and simpler version when this is true. Otherwise some reordering allows for taking care of possible overflows without any actual conditionals. And prevent from generating both code variants by making sure this is considered only if m is perceived as constant by the compiler. This, in turn, allows for greatly simplifying __div64_const32(). The "special case" may go as well as the regular case works just fine without needing a bias. Then reduction should be applied all the time as minimizing m is the key. Signed-off-by: Nicolas Pitre Signed-off-by: Arnd Bergmann --- include/asm-generic/div64.h | 114 ++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 13f5aa68a455..5d59cf7e73d9 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -74,7 +74,8 @@ * do the trick here). \ */ \ uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ - uint32_t ___p, ___bias; \ + uint32_t ___p; \ + bool ___bias = false; \ \ /* determine MSB of b */ \ ___p = 1 << ilog2(___b); \ @@ -87,22 +88,14 @@ ___x = ~0ULL / ___b * ___b - 1; \ \ /* test our ___m with res = m * x / (p << 64) */ \ - ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ - ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ - ___res += (___x & 0xffffffff) * (___m >> 32); \ - ___t = (___res < ___t) ? (1ULL << 32) : 0; \ - ___res = (___res >> 32) + ___t; \ - ___res += (___m >> 32) * (___x >> 32); \ - ___res /= ___p; \ + ___res = (___m & 0xffffffff) * (___x & 0xffffffff); \ + ___t = (___m & 0xffffffff) * (___x >> 32) + (___res >> 32); \ + ___res = (___m >> 32) * (___x >> 32) + (___t >> 32); \ + ___t = (___m >> 32) * (___x & 0xffffffff) + (___t & 0xffffffff);\ + ___res = (___res + (___t >> 32)) / ___p; \ \ - /* Now sanitize and optimize what we've got. */ \ - if (~0ULL % (___b / (___b & -___b)) == 0) { \ - /* special case, can be simplified to ... */ \ - ___n /= (___b & -___b); \ - ___m = ~0ULL / (___b / (___b & -___b)); \ - ___p = 1; \ - ___bias = 1; \ - } else if (___res != ___x / ___b) { \ + /* Now validate what we've got. */ \ + if (___res != ___x / ___b) { \ /* \ * We can't get away without a bias to compensate \ * for bit truncation errors. To avoid it we'd need an \ @@ -111,45 +104,18 @@ * \ * Instead we do m = p / b and n / b = (n * m + m) / p. \ */ \ - ___bias = 1; \ + ___bias = true; \ /* Compute m = (p << 64) / b */ \ ___m = (~0ULL / ___b) * ___p; \ ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ - } else { \ - /* \ - * Reduce m / p, and try to clear bit 31 of m when \ - * possible, otherwise that'll need extra overflow \ - * handling later. \ - */ \ - uint32_t ___bits = -(___m & -___m); \ - ___bits |= ___m >> 32; \ - ___bits = (~___bits) << 1; \ - /* \ - * If ___bits == 0 then setting bit 31 is unavoidable. \ - * Simply apply the maximum possible reduction in that \ - * case. Otherwise the MSB of ___bits indicates the \ - * best reduction we should apply. \ - */ \ - if (!___bits) { \ - ___p /= (___m & -___m); \ - ___m /= (___m & -___m); \ - } else { \ - ___p >>= ilog2(___bits); \ - ___m >>= ilog2(___bits); \ - } \ - /* No bias needed. */ \ - ___bias = 0; \ } \ \ + /* Reduce m / p to help avoid overflow handling later. */ \ + ___p /= (___m & -___m); \ + ___m /= (___m & -___m); \ + \ /* \ - * Now we have a combination of 2 conditions: \ - * \ - * 1) whether or not we need to apply a bias, and \ - * \ - * 2) whether or not there might be an overflow in the cross \ - * product determined by (___m & ((1 << 63) | (1 << 31))). \ - * \ - * Select the best way to do (m_bias + m * n) / (1 << 64). \ + * Perform (m_bias + m * n) / (1 << 64). \ * From now on there will be actual runtime code generated. \ */ \ ___res = __arch_xprod_64(___m, ___n, ___bias); \ @@ -165,7 +131,7 @@ * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 * * The product is a 128-bit value, scaled down to 64 bits. - * Assuming constant propagation to optimize away unused conditional code. + * Hoping for compile-time optimization of conditional code. * Architectures may provide their own optimized assembly implementation. */ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) @@ -174,38 +140,28 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) uint32_t m_hi = m >> 32; uint32_t n_lo = n; uint32_t n_hi = n >> 32; - uint64_t res; - uint32_t res_lo, res_hi, tmp; - - if (!bias) { - res = ((uint64_t)m_lo * n_lo) >> 32; - } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { - /* there can't be any overflow here */ - res = (m + (uint64_t)m_lo * n_lo) >> 32; - } else { - res = m + (uint64_t)m_lo * n_lo; - res_lo = res >> 32; - res_hi = (res_lo < m_hi); - res = res_lo | ((uint64_t)res_hi << 32); - } - - if (!(m & ((1ULL << 63) | (1ULL << 31)))) { - /* there can't be any overflow here */ - res += (uint64_t)m_lo * n_hi; - res += (uint64_t)m_hi * n_lo; - res >>= 32; + uint64_t x, y; + + /* Determine if overflow handling can be dispensed with. */ + bool no_ovf = __builtin_constant_p(m) && + ((m >> 32) + (m & 0xffffffff) < 0x100000000); + + if (no_ovf) { + x = (uint64_t)m_lo * n_lo + (bias ? m : 0); + x >>= 32; + x += (uint64_t)m_lo * n_hi; + x += (uint64_t)m_hi * n_lo; + x >>= 32; + x += (uint64_t)m_hi * n_hi; } else { - res += (uint64_t)m_lo * n_hi; - tmp = res >> 32; - res += (uint64_t)m_hi * n_lo; - res_lo = res >> 32; - res_hi = (res_lo < tmp); - res = res_lo | ((uint64_t)res_hi << 32); + x = (uint64_t)m_lo * n_lo + (bias ? m_lo : 0); + y = (uint64_t)m_lo * n_hi + (uint32_t)(x >> 32) + (bias ? m_hi : 0); + x = (uint64_t)m_hi * n_hi + (uint32_t)(y >> 32); + y = (uint64_t)m_hi * n_lo + (uint32_t)y; + x += (uint32_t)(y >> 32); } - res += (uint64_t)m_hi * n_hi; - - return res; + return x; } #endif -- cgit v1.2.3 From d533cb2d2af400f4689d8c5ca41db6d83ff173be Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 3 Oct 2024 17:16:16 -0400 Subject: __arch_xprod64(): make __always_inline when optimizing for performance Recent gcc versions started not systematically inline __arch_xprod64() and that has performance implications. Give the compiler the freedom to decide only when optimizing for size. Here's some timing numbers from lib/math/test_div64.c Using __always_inline: ``` test_div64: Starting 64bit/32bit division and modulo test test_div64: Completed 64bit/32bit division and modulo test, 0.048285584s elapsed ``` Without __always_inline: ``` test_div64: Starting 64bit/32bit division and modulo test test_div64: Completed 64bit/32bit division and modulo test, 0.053023584s elapsed ``` Forcing constant base through the non-constant base code path: ``` test_div64: Starting 64bit/32bit division and modulo test test_div64: Completed 64bit/32bit division and modulo test, 0.103263776s elapsed ``` It is worth noting that test_div64 does half the test with non constant divisors already so the impact is greater than what those numbers show. And for what it is worth, those numbers were obtained using QEMU. The gcc version is 14.1.0. Signed-off-by: Nicolas Pitre Signed-off-by: Arnd Bergmann --- include/asm-generic/div64.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 5d59cf7e73d9..25e7b4b58dcf 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -134,7 +134,12 @@ * Hoping for compile-time optimization of conditional code. * Architectures may provide their own optimized assembly implementation. */ -static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) +#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE +static __always_inline +#else +static inline +#endif +uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) { uint32_t m_lo = m; uint32_t m_hi = m >> 32; -- cgit v1.2.3 From b660d0a2acb9053d627befbb259a397d26aa9582 Mon Sep 17 00:00:00 2001 From: Julian Vetter Date: Mon, 28 Oct 2024 14:42:24 +0100 Subject: New implementation for IO memcpy and IO memset The IO memcpy and IO memset functions in asm-generic/io.h simply call memcpy and memset. This can lead to alignment problems or faults on architectures that do not define their own version and fall back to these defaults. This patch introduces new implementations for IO memcpy and IO memset, that use read{l,q} accessor functions, align accesses to machine word size, and resort to byte accesses when the target memory is not aligned. For new architectures and existing ones that were using the old fallbacks these functions are save to use, because IO memory constraints are taken into account. Moreover, architectures with similar implementations can now use these new versions, not needing to implement their own. Reviewed-by: Yann Sionneau Signed-off-by: Julian Vetter Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 1027be6a62bc..a5cbbf3e26ec 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1211,7 +1211,6 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) #endif #ifndef memset_io -#define memset_io memset_io /** * memset_io Set a range of I/O memory to a constant value * @addr: The beginning of the I/O-memory range to set @@ -1220,15 +1219,10 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) * * Set a range of I/O memory to a given value. */ -static inline void memset_io(volatile void __iomem *addr, int value, - size_t size) -{ - memset(__io_virt(addr), value, size); -} +void memset_io(volatile void __iomem *addr, int val, size_t count); #endif #ifndef memcpy_fromio -#define memcpy_fromio memcpy_fromio /** * memcpy_fromio Copy a block of data from I/O memory * @dst: The (RAM) destination for the copy @@ -1237,16 +1231,10 @@ static inline void memset_io(volatile void __iomem *addr, int value, * * Copy a block of data from I/O memory. */ -static inline void memcpy_fromio(void *buffer, - const volatile void __iomem *addr, - size_t size) -{ - memcpy(buffer, __io_virt(addr), size); -} +void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count); #endif #ifndef memcpy_toio -#define memcpy_toio memcpy_toio /** * memcpy_toio Copy a block of data into I/O memory * @dst: The (I/O memory) destination for the copy @@ -1255,11 +1243,7 @@ static inline void memcpy_fromio(void *buffer, * * Copy a block of data to I/O memory. */ -static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, - size_t size) -{ - memcpy(__io_virt(addr), buffer, size); -} +void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count); #endif extern int devmem_is_allowed(unsigned long pfn); -- cgit v1.2.3 From f390525d27bc03dc5925293cbd9f22329648b248 Mon Sep 17 00:00:00 2001 From: Dingyan Li <18500469033@163.com> Date: Sun, 20 Oct 2024 15:47:21 +0800 Subject: usb: storage: fix wrong comments for struct bulk_cb_wrap In the flags, direction is in bit 7 instead of bit 0 based on the specification. Signed-off-by: Dingyan Li <18500469033@163.com> Link: https://lore.kernel.org/r/20241020074721.26905-1-18500469033@163.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/storage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index 2827ce72e502..8539956bc2be 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -53,7 +53,7 @@ struct bulk_cb_wrap { __le32 Signature; /* contains 'USBC' */ __u32 Tag; /* unique per command id */ __le32 DataTransferLength; /* size of data */ - __u8 Flags; /* direction in bit 0 */ + __u8 Flags; /* direction in bit 7 */ __u8 Lun; /* LUN normally 0 */ __u8 Length; /* length of the CDB */ __u8 CDB[16]; /* max command */ -- cgit v1.2.3 From 4fd06a5358e0d888d1bf23d274971ea7d1f45aad Mon Sep 17 00:00:00 2001 From: David Dai Date: Wed, 18 Sep 2024 17:08:33 -0700 Subject: cpufreq: add virtual-cpufreq driver Introduce a virtualized cpufreq driver for guest kernels to improve performance and power of workloads within VMs. This driver does two main things: 1. Sends the frequency of vCPUs as a hint to the host. The host uses the hint to schedule the vCPU threads and decide physical CPU frequency. 2. If a VM does not support a virtualized FIE(like AMUs), it queries the host CPU frequency by reading a MMIO region of a virtual cpufreq device to update the guest's frequency scaling factor periodically. This enables accurate Per-Entity Load Tracking for tasks running in the guest. Co-developed-by: Saravana Kannan Signed-off-by: Saravana Kannan Signed-off-by: David Dai Signed-off-by: Viresh Kumar --- include/linux/arch_topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index b721f360d759..d5d848849408 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -49,6 +49,7 @@ enum scale_freq_source { SCALE_FREQ_SOURCE_CPUFREQ = 0, SCALE_FREQ_SOURCE_ARCH, SCALE_FREQ_SOURCE_CPPC, + SCALE_FREQ_SOURCE_VIRT, }; struct scale_freq_data { -- cgit v1.2.3 From 5935b8377a0f3b2401e4e487336ed90fe6b9254d Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Wed, 23 Oct 2024 11:25:44 +0800 Subject: dma-mapping: remove an outdated comment from dma-map-ops.h The "/* CONFIG_ARCH_HAS_DMA_COHERENCE_H */" was an description about the ARCH_HAS_DMA_COHERENCE_H configure option, but it has been removed since the dma_default_coherent variable was lifted from the mips architecture to the driver core. Therefore it doesn't match any compile guard now. Just remove it. Fixes: 6d4e9a8efe3d ("driver core: lift dma_default_coherent into common code") Signed-off-by: Sui Jingfeng Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index b7773201414c..e172522cd936 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -242,7 +242,7 @@ static inline bool dev_is_dma_coherent(struct device *dev) { return true; } -#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +#endif static inline void dma_reset_need_sync(struct device *dev) { -- cgit v1.2.3 From 5af5fc895fb9deec3d7b225f5bc2bd4949f8bbd5 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:34 -0400 Subject: dma-mapping: use macros to define events in a class Use a macro to avoid repeating the parameters and arguments for each event in a class. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 60 ++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index b0f41265191c..3d348cea4d7c 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -65,15 +65,14 @@ DECLARE_EVENT_CLASS(dma_map, decode_dma_attrs(__entry->attrs)) ); -DEFINE_EVENT(dma_map, dma_map_page, - TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); +#define DEFINE_MAP_EVENT(name) \ +DEFINE_EVENT(dma_map, name, \ + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, \ + size_t size, enum dma_data_direction dir, unsigned long attrs), \ + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)) -DEFINE_EVENT(dma_map, dma_map_resource, - TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); +DEFINE_MAP_EVENT(dma_map_page); +DEFINE_MAP_EVENT(dma_map_resource); DECLARE_EVENT_CLASS(dma_unmap, TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, @@ -104,15 +103,14 @@ DECLARE_EVENT_CLASS(dma_unmap, decode_dma_attrs(__entry->attrs)) ); -DEFINE_EVENT(dma_unmap, dma_unmap_page, - TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, addr, size, dir, attrs)); +#define DEFINE_UNMAP_EVENT(name) \ +DEFINE_EVENT(dma_unmap, name, \ + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, \ + enum dma_data_direction dir, unsigned long attrs), \ + TP_ARGS(dev, addr, size, dir, attrs)) -DEFINE_EVENT(dma_unmap, dma_unmap_resource, - TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, addr, size, dir, attrs)); +DEFINE_UNMAP_EVENT(dma_unmap_page); +DEFINE_UNMAP_EVENT(dma_unmap_resource); TRACE_EVENT(dma_alloc, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, @@ -279,15 +277,14 @@ DECLARE_EVENT_CLASS(dma_sync_single, __entry->size) ); -DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu, - TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir), - TP_ARGS(dev, dma_addr, size, dir)); +#define DEFINE_SYNC_SINGLE_EVENT(name) \ +DEFINE_EVENT(dma_sync_single, name, \ + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, \ + enum dma_data_direction dir), \ + TP_ARGS(dev, dma_addr, size, dir)) -DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, - TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir), - TP_ARGS(dev, dma_addr, size, dir)); +DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_cpu); +DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_device); DECLARE_EVENT_CLASS(dma_sync_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, @@ -326,15 +323,14 @@ DECLARE_EVENT_CLASS(dma_sync_sg, sizeof(unsigned int), sizeof(unsigned int))) ); -DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir)); +#define DEFINE_SYNC_SG_EVENT(name) \ +DEFINE_EVENT(dma_sync_sg, name, \ + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, \ + enum dma_data_direction dir), \ + TP_ARGS(dev, sg, nents, dir)) -DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir)); +DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_cpu); +DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_device); #endif /* _TRACE_DMA_H */ -- cgit v1.2.3 From 3afff779a725cba914e6caba360b696ae6f90249 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:35 -0400 Subject: dma-mapping: trace dma_alloc/free direction In preparation for using these tracepoints in a few more places, trace the DMA direction as well. For coherent allocations this is always bidirectional. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 3d348cea4d7c..267cfa49d9d5 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -114,8 +114,9 @@ DEFINE_UNMAP_EVENT(dma_unmap_resource); TRACE_EVENT(dma_alloc, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, - size_t size, gfp_t flags, unsigned long attrs), - TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs), + size_t size, enum dma_data_direction dir, gfp_t flags, + unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -123,6 +124,7 @@ TRACE_EVENT(dma_alloc, __field(u64, dma_addr) __field(size_t, size) __field(gfp_t, flags) + __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), @@ -135,8 +137,9 @@ TRACE_EVENT(dma_alloc, __entry->attrs = attrs; ), - TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s", + TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s", __get_str(device), + decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->virt_addr, @@ -146,14 +149,15 @@ TRACE_EVENT(dma_alloc, TRACE_EVENT(dma_free, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, - size_t size, unsigned long attrs), - TP_ARGS(dev, virt_addr, dma_addr, size, attrs), + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(void *, virt_addr) __field(u64, dma_addr) __field(size_t, size) + __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), @@ -162,11 +166,13 @@ TRACE_EVENT(dma_free, __entry->virt_addr = virt_addr; __entry->dma_addr = dma_addr; __entry->size = size; + __entry->dir = dir; __entry->attrs = attrs; ), - TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s", + TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s", __get_str(device), + decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->virt_addr, -- cgit v1.2.3 From c4484ab86ee00f2d9236e2851621ea02c105f4cc Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:36 -0400 Subject: dma-mapping: use trace_dma_alloc for dma_alloc* instead of using trace_dma_map In some cases, we use trace_dma_map to trace dma_alloc* functions. This generally follows dma_debug. However, this does not record all of the relevant information for allocations, such as GFP flags. Create new dma_alloc tracepoints for these functions. Note that while dma_alloc_noncontiguous may allocate discontiguous pages (from the CPU's point of view), the device will only see one contiguous mapping. Therefore, we just need to trace dma_addr and size. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 99 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 267cfa49d9d5..7a9606b8934e 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -112,7 +112,7 @@ DEFINE_EVENT(dma_unmap, name, \ DEFINE_UNMAP_EVENT(dma_unmap_page); DEFINE_UNMAP_EVENT(dma_unmap_resource); -TRACE_EVENT(dma_alloc, +DECLARE_EVENT_CLASS(dma_alloc_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, gfp_t flags, unsigned long attrs), @@ -147,7 +147,58 @@ TRACE_EVENT(dma_alloc, decode_dma_attrs(__entry->attrs)) ); -TRACE_EVENT(dma_free, +#define DEFINE_ALLOC_EVENT(name) \ +DEFINE_EVENT(dma_alloc_class, name, \ + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \ + size_t size, enum dma_data_direction dir, gfp_t flags, \ + unsigned long attrs), \ + TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs)) + +DEFINE_ALLOC_EVENT(dma_alloc); +DEFINE_ALLOC_EVENT(dma_alloc_pages); + +TRACE_EVENT(dma_alloc_sgt, + TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, + enum dma_data_direction dir, gfp_t flags, unsigned long attrs), + TP_ARGS(dev, sgt, size, dir, flags, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, sgt->orig_nents) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(gfp_t, flags) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + struct scatterlist *sg; + int i; + + __assign_str(device); + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + __entry->dma_addr = sg_dma_address(sgt->sgl); + __entry->size = size; + __entry->dir = dir; + __entry->flags = flags; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s flags=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + show_gfp_flags(__entry->flags), + decode_dma_attrs(__entry->attrs)) +); + +DECLARE_EVENT_CLASS(dma_free_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs), @@ -179,6 +230,50 @@ TRACE_EVENT(dma_free, decode_dma_attrs(__entry->attrs)) ); +#define DEFINE_FREE_EVENT(name) \ +DEFINE_EVENT(dma_free_class, name, \ + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \ + size_t size, enum dma_data_direction dir, unsigned long attrs), \ + TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs)) + +DEFINE_FREE_EVENT(dma_free); +DEFINE_FREE_EVENT(dma_free_pages); + +TRACE_EVENT(dma_free_sgt, + TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, sgt, size, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, sgt->orig_nents) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + struct scatterlist *sg; + int i; + + __assign_str(device); + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + __entry->dma_addr = sg_dma_address(sgt->sgl); + __entry->size = size; + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64))) +); + TRACE_EVENT(dma_map_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int ents, enum dma_data_direction dir, unsigned long attrs), -- cgit v1.2.3 From 68b6dbf1f441c4eba3b8511728a41cf9b01dca35 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:37 -0400 Subject: dma-mapping: trace more error paths It can be surprising to the user if DMA functions are only traced on success. On failure, it can be unclear what the source of the problem is. Fix this by tracing all functions even when they fail. Cases where we BUG/WARN are skipped, since those should be sufficiently noisy already. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 7a9606b8934e..d8ddc27b6a7c 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -156,6 +156,7 @@ DEFINE_EVENT(dma_alloc_class, name, \ DEFINE_ALLOC_EVENT(dma_alloc); DEFINE_ALLOC_EVENT(dma_alloc_pages); +DEFINE_ALLOC_EVENT(dma_alloc_sgt_err); TRACE_EVENT(dma_alloc_sgt, TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, @@ -320,6 +321,41 @@ TRACE_EVENT(dma_map_sg, decode_dma_attrs(__entry->attrs)) ); +TRACE_EVENT(dma_map_sg_err, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, + int err, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sgl, nents, err, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, nents) + __field(int, err) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + struct scatterlist *sg; + int i; + + __assign_str(device); + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + __entry->err = err; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addrs=%s err=%d attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + __entry->err, + decode_dma_attrs(__entry->attrs)) +); + TRACE_EVENT(dma_unmap_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs), -- cgit v1.2.3 From be164349e173a8e71cd76f17c7ed720813b8d69b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Oct 2024 07:19:48 +0200 Subject: dma-mapping: drop unneeded includes from dma-mapping.h Back in the day a lot of logic was implemented inline in dma-mapping.h and needed various includes. Move of this has long been moved out of line, so we can drop various includes to improve kernel rebuild times. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 1524da363734..b79925b1c433 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -2,15 +2,11 @@ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H -#include -#include -#include #include #include #include #include #include -#include /** * List of possible attributes associated with a DMA mapping. The semantics -- cgit v1.2.3 From 69e5a17511f654db0fe058d22719cef008c9faf1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Oct 2024 14:01:19 -0300 Subject: iommu: Remove useless flush from iommu_create_device_direct_mappings() These days iommu_map() does not require external flushing, it always internally handles any required flushes. Since iommu_create_device_direct_mappings() only calls iommu_map(), remove the extra call. Since this is the last call site for iommu_flush_iotlb_all() remove it too. Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-bb6c694e1b07+a29e1-iommu_no_flush_all_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 62d1b85c80d3..03f9fa833db9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -852,12 +852,6 @@ void iommu_set_dma_strict(void); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); -static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) -{ - if (domain->ops->flush_iotlb_all) - domain->ops->flush_iotlb_all(domain); -} - static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { @@ -1141,10 +1135,6 @@ static inline ssize_t iommu_map_sg(struct iommu_domain *domain, return -ENODEV; } -static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) -{ -} - static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { -- cgit v1.2.3 From f6440fcc9c7b7aaad2e52830ab83fa71990f76ff Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Oct 2024 12:11:47 +0800 Subject: iommu: Remove iommu_domain_alloc() The iommu_domain_alloc() interface is no longer used in the tree anymore. Remove it to avoid dead code. There is increasing demand for supporting multiple IOMMU drivers, and this is the last bus-based thing standing in the way of that. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20241009041147.28391-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 03f9fa833db9..d93c87f0c003 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -787,7 +787,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) extern int bus_iommu_probe(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); -extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, @@ -1079,11 +1078,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) -{ - return NULL; -} - static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From 20858d4ebb423826b7a978d54cde195f51d87e20 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Oct 2024 09:38:00 +0000 Subject: iommu: Introduce iommu_paging_domain_alloc_flags() Currently drivers calls iommu_paging_domain_alloc(dev) to get an UNMANAGED domain. This is not sufficient to support PASID with UNMANAGED domain as some HW like AMD requires certain page table type to support PASIDs. Also the domain_alloc_paging op only passes device as param for domain allocation. This is not sufficient for AMD driver to decide the right page table. Instead of extending ops->domain_alloc_paging() it was decided to enhance ops->domain_alloc_user() so that caller can pass various additional flags. Hence add iommu_paging_domain_alloc_flags() API which takes flags as parameter. Caller can pass additional parameter to indicate type of domain required, etc. iommu_paging_domain_alloc_flags() internally calls appropriate callback function to allocate a domain. Signed-off-by: Jason Gunthorpe [Added description - Vasant] Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241028093810.5901-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d93c87f0c003..aa78d911fdda 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -511,8 +511,6 @@ static inline int __iommu_copy_struct_from_user_array( * the caller iommu_domain_alloc() returns. * @domain_alloc_user: Allocate an iommu domain corresponding to the input * parameters as defined in include/uapi/linux/iommufd.h. - * Unlike @domain_alloc, it is called only by IOMMUFD and - * must fully initialize the new domain before return. * Upon success, if the @user_data is valid and the @parent * points to a kernel-managed domain, the new domain must be * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be @@ -787,7 +785,11 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) extern int bus_iommu_probe(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); -struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); +struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags); +static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + return iommu_paging_domain_alloc_flags(dev, 0); +} extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -1078,6 +1080,12 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } +struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, + unsigned int flags) +{ + return ERR_PTR(-ENODEV); +} + static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From b7a0855eb95f6db8ac8e17596e76f7b94a790fe6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Oct 2024 09:38:01 +0000 Subject: iommu: Add new flag to explictly request PASID capable domain Introduce new flag (IOMMU_HWPT_ALLOC_PASID) to domain_alloc_users() ops. If IOMMU supports PASID it will allocate domain. Otherwise return error. In error path check for -EOPNOTSUPP and try to allocate non-PASID domain so that DMA-API mode work fine for drivers which does not support PASID as well. Also modify __iommu_group_alloc_default_domain() to call iommu_paging_domain_alloc_flags() with appropriate flag when allocating paging domain. Signed-off-by: Jason Gunthorpe Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241028093810.5901-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommufd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..0c0ed28ee113 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -359,11 +359,19 @@ struct iommu_vfio_ioas { * enforced on device attachment * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is * valid. + * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The + * domain can be attached to any PASID on the device. + * Any domain attached to the non-PASID part of the + * device must also be flaged, otherwise attaching a + * PASID will blocked. + * If IOMMU does not support PASID it will return + * error (-EOPNOTSUPP). */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, + IOMMU_HWPT_ALLOC_PASID = 1 << 3, }; /** -- cgit v1.2.3 From b6da940130579769a42605b2c7f529b6f14ef1f8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 9 Oct 2024 16:29:37 +0200 Subject: mm, slab: add kerneldocs for common SLAB_ flags We have many SLAB_ flags but many are used only internally, by kunit tests or debugging subsystems cooperating with slab, or are set according to slab_debug boot parameter. Create kerneldocs for the commonly used flags that may be passed to kmem_cache_create(). SLAB_TYPESAFE_BY_RCU already had a detailed description, so turn it to a kerneldoc. Add some details for SLAB_ACCOUNT, SLAB_RECLAIM_ACCOUNT and SLAB_HWCACHE_ALIGN. Reference them from the __kmem_cache_create_args() kerneldoc. Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 60 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index b35e2db7eb0e..49e9fb93e864 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -77,7 +77,17 @@ enum _slab_flag_bits { #define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ #define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) -/* Align objs on cache lines */ +/** + * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries. + * + * Sufficiently large objects are aligned on cache line boundary. For object + * size smaller than a half of cache line size, the alignment is on the half of + * cache line size. In general, if object size is smaller than 1/2^n of cache + * line size, the alignment is adjusted to 1/2^n. + * + * If explicit alignment is also requested by the respective + * &struct kmem_cache_args field, the greater of both is alignments is applied. + */ #define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) @@ -87,8 +97,8 @@ enum _slab_flag_bits { #define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ #define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) -/* - * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! +/** + * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -99,20 +109,22 @@ enum _slab_flag_bits { * stays valid, the trick to using this is relying on an independent * object validation pass. Something like: * - * begin: - * rcu_read_lock(); - * obj = lockless_lookup(key); - * if (obj) { - * if (!try_get_ref(obj)) // might fail for free objects - * rcu_read_unlock(); - * goto begin; + * :: + * + * begin: + * rcu_read_lock(); + * obj = lockless_lookup(key); + * if (obj) { + * if (!try_get_ref(obj)) // might fail for free objects + * rcu_read_unlock(); + * goto begin; * - * if (obj->key != key) { // not the object we expected - * put_ref(obj); - * rcu_read_unlock(); - * goto begin; - * } - * } + * if (obj->key != key) { // not the object we expected + * put_ref(obj); + * rcu_read_unlock(); + * goto begin; + * } + * } * rcu_read_unlock(); * * This is useful if we need to approach a kernel structure obliquely, @@ -137,7 +149,6 @@ enum _slab_flag_bits { * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -/* Defer freeing slabs to RCU */ #define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ #define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) @@ -170,7 +181,12 @@ enum _slab_flag_bits { #else # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif -/* Account to memcg */ +/** + * define SLAB_ACCOUNT - Account allocations to memcg. + * + * All object allocations from this cache will be memcg accounted, regardless of + * __GFP_ACCOUNT being or not being passed to individual allocations. + */ #ifdef CONFIG_MEMCG # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else @@ -197,7 +213,13 @@ enum _slab_flag_bits { #endif /* The following flags affect the page allocator grouping pages by mobility */ -/* Objects are reclaimable */ +/** + * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable. + * + * Use this flag for caches that have an associated shrinker. As a result, slab + * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by + * mobility, and are accounted in SReclaimable counter in /proc/meminfo + */ #ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else -- cgit v1.2.3 From 68f99be287a59d50a9ad231d523f7e578f8bd28a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Oct 2024 10:42:00 +0200 Subject: signal: Confine POSIX_TIMERS properly Move the itimer rearming out of the signal code and consolidate all posix timer related functions in the signal code under one ifdef. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241001083835.314100569@linutronix.de --- include/linux/posix-timers.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 453691710839..670bf03a56ef 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -100,6 +100,8 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, { pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } +void posixtimer_rearm_itimer(struct task_struct *p); +void posixtimer_rearm(struct kernel_siginfo *info); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -122,6 +124,8 @@ struct cpu_timer { }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } +static inline void posixtimer_rearm_itimer(struct task_struct *p) { } +static inline void posixtimer_rearm(struct kernel_siginfo *info) { } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK @@ -196,5 +200,4 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); -void posixtimer_rearm(struct kernel_siginfo *info); #endif -- cgit v1.2.3 From 4febce44cfebcb490b196d5d10ae9f403ca4c956 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Oct 2024 10:42:03 +0200 Subject: posix-timers: Cure si_sys_private race The si_sys_private member of the siginfo which is embedded in the preallocated sigqueue is used by the posix timer code to decide whether a timer must be reprogrammed on signal delivery. The handling of this is racy as a long standing comment in that code documents. It is modified with the timer lock held, but without sighand lock being held. The actual signal delivery code checks for it under sighand lock without holding the timer lock. Hand the new value to send_sigqueue() as argument and store it with sighand lock held. This is an intermediate change to address this issue. The arguments to this function will be cleanup in subsequent changes. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241001083835.434338954@linutronix.de --- include/linux/sched/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index c8ed09ac29ac..bd9f569231d9 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -340,7 +340,7 @@ extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); +extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type, int si_private); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline void clear_notify_signal(void) -- cgit v1.2.3 From c775ea28d4e23f5e58b6953645ef90c1b27a8e83 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Oct 2024 10:42:04 +0200 Subject: signal: Allow POSIX timer signals to be dropped In case that a timer was reprogrammed or deleted an already pending signal is obsolete. Right now such signals are kept around and eventually delivered. While POSIX is blury about this: - "The effect of disarming or resetting a timer with pending expiration notifications is unspecified." - "The disposition of pending signals for the deleted timer is unspecified." it is reasonable in both cases to expect that pending signals are discarded as they have no meaning anymore. Prepare the signal code to allow dropping posix timer signals. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241001083835.494416923@linutronix.de --- include/linux/posix-timers.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 670bf03a56ef..4ab49e5c42af 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -100,8 +100,9 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, { pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } + void posixtimer_rearm_itimer(struct task_struct *p); -void posixtimer_rearm(struct kernel_siginfo *info); +bool posixtimer_deliver_signal(struct kernel_siginfo *info); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -125,7 +126,7 @@ static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } static inline void posixtimer_rearm_itimer(struct task_struct *p) { } -static inline void posixtimer_rearm(struct kernel_siginfo *info) { } +static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info) { return false; } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK -- cgit v1.2.3 From cd1e93aedab7f749760a33e9e094381973b1120e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Oct 2024 10:42:07 +0200 Subject: posix-timers: Rename k_itimer:: It_requeue_pending Prepare for using this struct member to do a proper reprogramming and deletion accounting so that stale signals can be dropped. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241001083835.611997737@linutronix.de --- include/linux/posix-timers.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 4ab49e5c42af..253d106fac2c 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -150,8 +150,7 @@ static inline void posix_cputimers_init_work(void) { } * @it_active: Marker that timer is active * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal - * @it_requeue_pending: Indicator that timer waits for being requeued on - * signal delivery + * @it_signal_seq: Sequence count to control signal delivery * @it_sigev_notify: The notify word of sigevent struct for signal delivery * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct @@ -172,7 +171,7 @@ struct k_itimer { int it_active; s64 it_overrun; s64 it_overrun_last; - int it_requeue_pending; + unsigned int it_signal_seq; int it_sigev_notify; ktime_t it_interval; struct signal_struct *it_signal; -- cgit v1.2.3 From 1550dde8a537b35dbf066c7f9cfe5f9b360bce0d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Oct 2024 10:42:09 +0200 Subject: posix-timers: Add proper state tracking Right now the state tracking is done by two struct members: - it_active: A boolean which tracks armed/disarmed state - it_signal_seq: A sequence counter which is used to invalidate settings and prevent rearming Replace it_active with it_status and keep properly track about the states in one place. This allows to reuse it_signal_seq to track reprogramming, disarm and delete operations in order to drop signals which are related to the state previous of those operations. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241001083835.670337048@linutronix.de --- include/linux/posix-timers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 253d106fac2c..02afbb4da7f7 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -147,7 +147,7 @@ static inline void posix_cputimers_init_work(void) { } * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer - * @it_active: Marker that timer is active + * @it_status: The status of the timer * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_signal_seq: Sequence count to control signal delivery @@ -168,7 +168,7 @@ struct k_itimer { const struct k_clock *kclock; clockid_t it_clock; timer_t it_id; - int it_active; + int it_status; s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; -- cgit v1.2.3 From 9cb7e40d388d6c0e4677809c6b2950bc67fd8830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:28 -0700 Subject: rtnetlink: Make per-netns RTNL dereference helpers to macro. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_DEBUG_NET_SMALL_RTNL is off, rtnl_net_dereference() is the static inline wrapper of rtnl_dereference() returning a plain (void *) pointer to make sure net is always evaluated as requested in [0]. But, it makes sparse complain [1] when the pointer has __rcu annotation: net/ipv4/devinet.c:674:47: sparse: warning: incorrect type in argument 2 (different address spaces) net/ipv4/devinet.c:674:47: sparse: expected void *p net/ipv4/devinet.c:674:47: sparse: got struct in_ifaddr [noderef] __rcu * Also, if we evaluate net as (void *) in a macro, then the compiler in turn fails to build due to -Werror=unused-value. #define rtnl_net_dereference(net, p) \ ({ \ (void *)net; \ rtnl_dereference(p); \ }) net/ipv4/devinet.c: In function ‘inet_rtm_deladdr’: ./include/linux/rtnetlink.h:154:17: error: statement with no effect [-Werror=unused-value] 154 | (void *)net; \ net/ipv4/devinet.c:674:21: note: in expansion of macro ‘rtnl_net_dereference’ 674 | (ifa = rtnl_net_dereference(net, *ifap)) != NULL; | ^~~~~~~~~~~~~~~~~~~~ Let's go back to the original simplest macro. Note that checkpatch complains about this approach, but it's one-shot and less noisy than the other two. WARNING: Argument 'net' is not used in function-like macro #76: FILE: include/linux/rtnetlink.h:142: +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) Fixes: 844e5e7e656d ("rtnetlink: Add assertion helpers for per-netns RTNL.") Link: https://lore.kernel.org/netdev/20241004132145.7fd208e9@kernel.org/ [0] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410200325.SaEJmyZS-lkp@intel.com/ [1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8468a4ce8510..0e62918de63b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -137,21 +137,12 @@ static inline void ASSERT_RTNL_NET(struct net *net) ASSERT_RTNL(); } -static inline void *rcu_dereference_rtnl_net(struct net *net, void *p) -{ - return rcu_dereference_rtnl(p); -} - -static inline void *rtnl_net_dereference(struct net *net, void *p) -{ - return rtnl_dereference(p); -} - -static inline void *rcu_replace_pointer_rtnl_net(struct net *net, - void *rp, void *p) -{ - return rcu_replace_pointer_rtnl(rp, p); -} +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_rtnl(p) +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer_rtnl(rp, p) #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) -- cgit v1.2.3 From 26d8db55eeacb7dc78672523f57825916d203de4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:29 -0700 Subject: rtnetlink: Define RTNL_FLAG_DOIT_PERNET for per-netns RTNL doit(). We will push RTNL down to each doit() as rtnl_net_lock(). We can use RTNL_FLAG_DOIT_UNLOCKED to call doit() without RTNL, but doit() will still hold RTNL. Let's define RTNL_FLAG_DOIT_PERNET as an alias of RTNL_FLAG_DOIT_UNLOCKED. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e0d9a8eae6b6..b260c0cc9671 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), +#define RTNL_FLAG_DOIT_PERNET RTNL_FLAG_DOIT_UNLOCKED RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), RTNL_FLAG_DUMP_UNLOCKED = BIT(2), RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */ -- cgit v1.2.3 From d4b483208b2606add41a22bdd3c8cd6d36009319 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:33 -0700 Subject: ipv4: Use per-netns RTNL helpers in inet_rtm_newaddr(). inet_rtm_to_ifa() and find_matching_ifa() are called under rtnl_net_lock(). __in_dev_get_rtnl() and in_dev_for_each_ifa_rtnl() there can use per-netns RTNL helpers. Let's define and use __in_dev_get_rtnl_net() and in_dev_for_each_ifa_rtnl_net(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/inetdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d9c690c8c80b..5730ba6b1cfa 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -226,6 +226,10 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr) for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \ ifa = rtnl_dereference(ifa->ifa_next)) +#define in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) \ + for (ifa = rtnl_net_dereference(net, (in_dev)->ifa_list); ifa; \ + ifa = rtnl_net_dereference(net, ifa->ifa_next)) + #define in_dev_for_each_ifa_rcu(ifa, in_dev) \ for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \ ifa = rcu_dereference(ifa->ifa_next)) @@ -252,6 +256,11 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } +static inline struct in_device *__in_dev_get_rtnl_net(const struct net_device *dev) +{ + return rtnl_net_dereference(dev_net(dev), dev->ip_ptr); +} + /* called with rcu_read_lock or rtnl held */ static inline bool ip_ignore_linkdown(const struct net_device *dev) { -- cgit v1.2.3 From d1c81818aa227b37d65b40f9883109c5256b9bfb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:36 -0700 Subject: rtnetlink: Define rtnl_net_trylock(). We will need the per-netns version of rtnl_trylock(). rtnl_net_trylock() calls __rtnl_net_lock() only when rtnl_trylock() successfully holds RTNL. When RTNL is removed, we will use mutex_trylock() for per-netns RTNL. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e62918de63b..14b88f551920 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -101,6 +101,7 @@ void __rtnl_net_lock(struct net *net); void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); +int rtnl_net_trylock(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); @@ -132,6 +133,11 @@ static inline void rtnl_net_unlock(struct net *net) rtnl_unlock(); } +static inline int rtnl_net_trylock(struct net *net) +{ + return rtnl_trylock(); +} + static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); -- cgit v1.2.3 From 1ddf9916ac09313128e40d6581cef889c0b4ce84 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Oct 2024 12:53:42 +0200 Subject: xfrm: Add support for per cpu xfrm state handling. Currently all flows for a certain SA must be processed by the same cpu to avoid packet reordering and lock contention of the xfrm state lock. To get rid of this limitation, the IETF standardized per cpu SAs in RFC 9611. This patch implements the xfrm part of it. We add the cpu as a lookup key for xfrm states and a config option to generate acquire messages for each cpu. With that, we can have on each cpu a SA with identical traffic selector so that flows can be processed in parallel on all cpus. Signed-off-by: Steffen Klassert Tested-by: Antony Antony Tested-by: Tobias Brunner --- include/net/xfrm.h | 5 +++-- include/uapi/linux/xfrm.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a0bdd58f401c..f5275618e744 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -188,6 +188,7 @@ struct xfrm_state { refcount_t refcnt; spinlock_t lock; + u32 pcpu_num; struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; @@ -1684,7 +1685,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); @@ -1796,7 +1797,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, - u8 mode, u32 reqid, u32 if_id, u8 proto, + u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index f28701500714..d73a97e3030a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -322,6 +322,7 @@ enum xfrm_attr_type_t { XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ XFRMA_SA_DIR, /* __u8 */ XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ + XFRMA_SA_PCPU, /* __u32 */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ @@ -437,6 +438,7 @@ struct xfrm_userpolicy_info { #define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */ /* Automatically expand selector to include matching ICMP payloads. */ #define XFRM_POLICY_ICMP 2 +#define XFRM_POLICY_CPU_ACQUIRE 4 __u8 share; }; -- cgit v1.2.3 From 0045e3d80613cc7174dc15f189ee6fc4e73b9365 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Oct 2024 12:53:43 +0200 Subject: xfrm: Cache used outbound xfrm states at the policy. Now that we can have percpu xfrm states, the number of active states might increase. To get a better lookup performance, we cache the used xfrm states at the policy for outbound IPsec traffic. Signed-off-by: Steffen Klassert Tested-by: Antony Antony Tested-by: Tobias Brunner --- include/net/xfrm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f5275618e744..0b394c5fb5f3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -184,6 +184,7 @@ struct xfrm_state { }; struct hlist_node byspi; struct hlist_node byseq; + struct hlist_node state_cache; refcount_t refcnt; spinlock_t lock; @@ -537,6 +538,7 @@ struct xfrm_policy_queue { * @xp_net: network namespace the policy lives in * @bydst: hlist node for SPD hash table or rbtree list * @byidx: hlist node for index hash table + * @state_cache_list: hlist head for policy cached xfrm states * @lock: serialize changes to policy structure members * @refcnt: reference count, freed once it reaches 0 * @pos: kernel internal tie-breaker to determine age of policy @@ -567,6 +569,8 @@ struct xfrm_policy { struct hlist_node bydst; struct hlist_node byidx; + struct hlist_head state_cache_list; + /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; -- cgit v1.2.3 From 81a331a0e72ddc2f75092603d9577bd1a0ca23ad Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Oct 2024 12:53:44 +0200 Subject: xfrm: Add an inbound percpu state cache. Now that we can have percpu xfrm states, the number of active states might increase. To get a better lookup performance, we add a percpu cache to cache the used inbound xfrm states. Signed-off-by: Steffen Klassert Tested-by: Antony Antony Tested-by: Tobias Brunner --- include/net/netns/xfrm.h | 1 + include/net/xfrm.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index ae60d6664095..23dd647fe024 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,6 +43,7 @@ struct netns_xfrm { struct hlist_head __rcu *state_bysrc; struct hlist_head __rcu *state_byspi; struct hlist_head __rcu *state_byseq; + struct hlist_head __percpu *state_cache_input; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0b394c5fb5f3..2b87999bd5aa 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -185,6 +185,7 @@ struct xfrm_state { struct hlist_node byspi; struct hlist_node byseq; struct hlist_node state_cache; + struct hlist_node state_cache_input; refcount_t refcnt; spinlock_t lock; @@ -1650,6 +1651,10 @@ int xfrm_state_update(struct xfrm_state *x); struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); +struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family); struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, -- cgit v1.2.3 From d14772c0d88c387f881a577aa136e1e9b1291d07 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 29 Oct 2024 11:54:25 +0100 Subject: iommu: Fix prototype of iommu_paging_domain_alloc_flags() The iommu_paging_domain_alloc_flags() prototype for non-iommu kernel configurations lacks the 'static inline' prefixes. Cc: Jason Gunthorpe Cc: Vasant Hegde Fixes: 20858d4ebb42 ("iommu: Introduce iommu_paging_domain_alloc_flags()") Reviewed-by: Jason Gunthorpe Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index aa78d911fdda..522efdc7d815 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1080,7 +1080,7 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, +static inline struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From a33bf8d8ce7e06bf0f033865b0cea5887cd2ac8c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 29 Oct 2024 12:11:49 +0100 Subject: iommu: Restore iommu_flush_iotlb_all() This patch restores the iommu_flush_iotlb_all() function. Commit 69e5a17511f6 ("iommu: Remove useless flush from iommu_create_device_direct_mappings()") claims it removed the last call-site, except it did not. There is still at least one caller in drivers/gpu/drm/msm/msm_iommu.c so keep the function around until all call-sites are updated. Cc: Jason Gunthorpe Fixes: 69e5a17511f6 ("iommu: Remove useless flush from iommu_create_device_direct_mappings()") Acked-by: Will Deacon Reviewed-by: Jason Gunthorpe Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 522efdc7d815..8cce372a33f1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -853,6 +853,12 @@ void iommu_set_dma_strict(void); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); +static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + if (domain->ops->flush_iotlb_all) + domain->ops->flush_iotlb_all(domain); +} + static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { @@ -1137,6 +1143,10 @@ static inline ssize_t iommu_map_sg(struct iommu_domain *domain, return -ENODEV; } +static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) +{ +} + static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { -- cgit v1.2.3 From c382429b587ac49bd179d768f13e7fa5e7ed1787 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 26 Oct 2024 21:38:02 +0200 Subject: platform/x86: wmi: Replace dev_to_wdev() with to_wmi_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace dev_to_wdev() with to_wmi_device() to stop duplicating functionality. Also switch to_wmi_device() to use container_of_const() so const values are handled correctly. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20241026193803.8802-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/wmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 120019677fc6..fca5fd43c707 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -34,7 +34,7 @@ struct wmi_device { * * Cast a struct device to a struct wmi_device. */ -#define to_wmi_device(device) container_of(device, struct wmi_device, dev) +#define to_wmi_device(device) container_of_const(device, struct wmi_device, dev) extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, u8 instance, u32 method_id, -- cgit v1.2.3 From e001341a984e709e377b275123aecb5a763eaef9 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 26 Oct 2024 21:38:03 +0200 Subject: platform/x86: wmi: Introduce to_wmi_driver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce to_wmi_driver() as a replacement for dev_to_wdrv() so WMI drivers can use this support macro instead of having to duplicate its functionality. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20241026193803.8802-3-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/wmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index fca5fd43c707..10751c8e5e6a 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -73,6 +73,14 @@ struct wmi_driver { void (*notify)(struct wmi_device *device, union acpi_object *data); }; +/** + * to_wmi_driver() - Helper macro to cast a driver to a wmi_driver + * @drv: driver struct + * + * Cast a struct device_driver to a struct wmi_driver. + */ +#define to_wmi_driver(drv) container_of_const(drv, struct wmi_driver, driver) + extern int __must_check __wmi_driver_register(struct wmi_driver *driver, struct module *owner); extern void wmi_driver_unregister(struct wmi_driver *driver); -- cgit v1.2.3 From 1db363f6979d1fc8a94ea561a50f79bac40d39e4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Oct 2024 12:41:49 +0300 Subject: drm/i915/pciids: add PVC PCI ID macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe PCI ID macros are a subset of the i915 PCI IDs macros, apart from the PVC PCI IDs (naturally, because i915 does not and will not support PVC). In preparation of using a shared file, add PVC PCI IDs to i915_pciids.h. Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/bc62e37cbfa3ed4dbfc75a7ca69b87afae6a727b.1729590029.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/intel/i915_pciids.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index 6b92f8c3731b..81d4dc5d9242 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -780,6 +780,22 @@ MACRO__(0x7D60, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) +/* PVC */ +#define INTEL_PVC_IDS(MACRO__, ...) \ + MACRO__(0x0B69, ## __VA_ARGS__), \ + MACRO__(0x0B6E, ## __VA_ARGS__), \ + MACRO__(0x0BD4, ## __VA_ARGS__), \ + MACRO__(0x0BD5, ## __VA_ARGS__), \ + MACRO__(0x0BD6, ## __VA_ARGS__), \ + MACRO__(0x0BD7, ## __VA_ARGS__), \ + MACRO__(0x0BD8, ## __VA_ARGS__), \ + MACRO__(0x0BD9, ## __VA_ARGS__), \ + MACRO__(0x0BDA, ## __VA_ARGS__), \ + MACRO__(0x0BDB, ## __VA_ARGS__), \ + MACRO__(0x0BE0, ## __VA_ARGS__), \ + MACRO__(0x0BE1, ## __VA_ARGS__), \ + MACRO__(0x0BE5, ## __VA_ARGS__) + /* LNL */ #define INTEL_LNL_IDS(MACRO__, ...) \ MACRO__(0x6420, ## __VA_ARGS__), \ -- cgit v1.2.3 From f719c2a2d1e7fb891d45998f241ff4273d7ae7e6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Oct 2024 12:41:50 +0300 Subject: drm/intel/pciids: rename i915_pciids.h to just pciids.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation of sharing the PCI ID macros between i915 and xe, rename i915_pciids.h to pciids.h. Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/835143845faa5310e4bb58405a8a0848392bbf06.1729590029.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/intel/i915_pciids.h | 825 ---------------------------------------- include/drm/intel/pciids.h | 825 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 825 insertions(+), 825 deletions(-) delete mode 100644 include/drm/intel/i915_pciids.h create mode 100644 include/drm/intel/pciids.h (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h deleted file mode 100644 index 81d4dc5d9242..000000000000 --- a/include/drm/intel/i915_pciids.h +++ /dev/null @@ -1,825 +0,0 @@ -/* - * Copyright 2013 Intel Corporation - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#ifndef _I915_PCIIDS_H -#define _I915_PCIIDS_H - -#ifdef __KERNEL__ -#define INTEL_VGA_DEVICE(_id, _info) { \ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \ - .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ - .driver_data = (kernel_ulong_t)(_info), \ -} - -#define INTEL_QUANTA_VGA_DEVICE(_info) { \ - .vendor = PCI_VENDOR_ID_INTEL, .device = 0x16a, \ - .subvendor = 0x152d, .subdevice = 0x8990, \ - .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ - .driver_data = (kernel_ulong_t)(_info), \ -} -#endif - -#define INTEL_I810_IDS(MACRO__, ...) \ - MACRO__(0x7121, ## __VA_ARGS__), /* I810 */ \ - MACRO__(0x7123, ## __VA_ARGS__), /* I810_DC100 */ \ - MACRO__(0x7125, ## __VA_ARGS__) /* I810_E */ - -#define INTEL_I815_IDS(MACRO__, ...) \ - MACRO__(0x1132, ## __VA_ARGS__) /* I815*/ - -#define INTEL_I830_IDS(MACRO__, ...) \ - MACRO__(0x3577, ## __VA_ARGS__) - -#define INTEL_I845G_IDS(MACRO__, ...) \ - MACRO__(0x2562, ## __VA_ARGS__) - -#define INTEL_I85X_IDS(MACRO__, ...) \ - MACRO__(0x3582, ## __VA_ARGS__), /* I855_GM */ \ - MACRO__(0x358e, ## __VA_ARGS__) - -#define INTEL_I865G_IDS(MACRO__, ...) \ - MACRO__(0x2572, ## __VA_ARGS__) /* I865_G */ - -#define INTEL_I915G_IDS(MACRO__, ...) \ - MACRO__(0x2582, ## __VA_ARGS__), /* I915_G */ \ - MACRO__(0x258a, ## __VA_ARGS__) /* E7221_G */ - -#define INTEL_I915GM_IDS(MACRO__, ...) \ - MACRO__(0x2592, ## __VA_ARGS__) /* I915_GM */ - -#define INTEL_I945G_IDS(MACRO__, ...) \ - MACRO__(0x2772, ## __VA_ARGS__) /* I945_G */ - -#define INTEL_I945GM_IDS(MACRO__, ...) \ - MACRO__(0x27a2, ## __VA_ARGS__), /* I945_GM */ \ - MACRO__(0x27ae, ## __VA_ARGS__) /* I945_GME */ - -#define INTEL_I965G_IDS(MACRO__, ...) \ - MACRO__(0x2972, ## __VA_ARGS__), /* I946_GZ */ \ - MACRO__(0x2982, ## __VA_ARGS__), /* G35_G */ \ - MACRO__(0x2992, ## __VA_ARGS__), /* I965_Q */ \ - MACRO__(0x29a2, ## __VA_ARGS__) /* I965_G */ - -#define INTEL_G33_IDS(MACRO__, ...) \ - MACRO__(0x29b2, ## __VA_ARGS__), /* Q35_G */ \ - MACRO__(0x29c2, ## __VA_ARGS__), /* G33_G */ \ - MACRO__(0x29d2, ## __VA_ARGS__) /* Q33_G */ - -#define INTEL_I965GM_IDS(MACRO__, ...) \ - MACRO__(0x2a02, ## __VA_ARGS__), /* I965_GM */ \ - MACRO__(0x2a12, ## __VA_ARGS__) /* I965_GME */ - -#define INTEL_GM45_IDS(MACRO__, ...) \ - MACRO__(0x2a42, ## __VA_ARGS__) /* GM45_G */ - -#define INTEL_G45_IDS(MACRO__, ...) \ - MACRO__(0x2e02, ## __VA_ARGS__), /* IGD_E_G */ \ - MACRO__(0x2e12, ## __VA_ARGS__), /* Q45_G */ \ - MACRO__(0x2e22, ## __VA_ARGS__), /* G45_G */ \ - MACRO__(0x2e32, ## __VA_ARGS__), /* G41_G */ \ - MACRO__(0x2e42, ## __VA_ARGS__), /* B43_G */ \ - MACRO__(0x2e92, ## __VA_ARGS__) /* B43_G.1 */ - -#define INTEL_PNV_G_IDS(MACRO__, ...) \ - MACRO__(0xa001, ## __VA_ARGS__) - -#define INTEL_PNV_M_IDS(MACRO__, ...) \ - MACRO__(0xa011, ## __VA_ARGS__) - -#define INTEL_PNV_IDS(MACRO__, ...) \ - INTEL_PNV_G_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_PNV_M_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_ILK_D_IDS(MACRO__, ...) \ - MACRO__(0x0042, ## __VA_ARGS__) - -#define INTEL_ILK_M_IDS(MACRO__, ...) \ - MACRO__(0x0046, ## __VA_ARGS__) - -#define INTEL_ILK_IDS(MACRO__, ...) \ - INTEL_ILK_D_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_ILK_M_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_SNB_D_GT1_IDS(MACRO__, ...) \ - MACRO__(0x0102, ## __VA_ARGS__), \ - MACRO__(0x010A, ## __VA_ARGS__) - -#define INTEL_SNB_D_GT2_IDS(MACRO__, ...) \ - MACRO__(0x0112, ## __VA_ARGS__), \ - MACRO__(0x0122, ## __VA_ARGS__) - -#define INTEL_SNB_D_IDS(MACRO__, ...) \ - INTEL_SNB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SNB_D_GT2_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_SNB_M_GT1_IDS(MACRO__, ...) \ - MACRO__(0x0106, ## __VA_ARGS__) - -#define INTEL_SNB_M_GT2_IDS(MACRO__, ...) \ - MACRO__(0x0116, ## __VA_ARGS__), \ - MACRO__(0x0126, ## __VA_ARGS__) - -#define INTEL_SNB_M_IDS(MACRO__, ...) \ - INTEL_SNB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SNB_M_GT2_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_SNB_IDS(MACRO__, ...) \ - INTEL_SNB_D_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SNB_M_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_IVB_M_GT1_IDS(MACRO__, ...) \ - MACRO__(0x0156, ## __VA_ARGS__) /* GT1 mobile */ - -#define INTEL_IVB_M_GT2_IDS(MACRO__, ...) \ - MACRO__(0x0166, ## __VA_ARGS__) /* GT2 mobile */ - -#define INTEL_IVB_M_IDS(MACRO__, ...) \ - INTEL_IVB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_IVB_M_GT2_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_IVB_D_GT1_IDS(MACRO__, ...) \ - MACRO__(0x0152, ## __VA_ARGS__), /* GT1 desktop */ \ - MACRO__(0x015a, ## __VA_ARGS__) /* GT1 server */ - -#define INTEL_IVB_D_GT2_IDS(MACRO__, ...) \ - MACRO__(0x0162, ## __VA_ARGS__), /* GT2 desktop */ \ - MACRO__(0x016a, ## __VA_ARGS__) /* GT2 server */ - -#define INTEL_IVB_D_IDS(MACRO__, ...) \ - INTEL_IVB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_IVB_D_GT2_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_IVB_IDS(MACRO__, ...) \ - INTEL_IVB_M_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_IVB_D_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_IVB_Q_IDS(MACRO__, ...) \ - INTEL_QUANTA_VGA_DEVICE(__VA_ARGS__) /* Quanta transcode */ - -#define INTEL_HSW_ULT_GT1_IDS(MACRO__, ...) \ - MACRO__(0x0A02, ## __VA_ARGS__), /* ULT GT1 desktop */ \ - MACRO__(0x0A06, ## __VA_ARGS__), /* ULT GT1 mobile */ \ - MACRO__(0x0A0A, ## __VA_ARGS__), /* ULT GT1 server */ \ - MACRO__(0x0A0B, ## __VA_ARGS__) /* ULT GT1 reserved */ - -#define INTEL_HSW_ULX_GT1_IDS(MACRO__, ...) \ - MACRO__(0x0A0E, ## __VA_ARGS__) /* ULX GT1 mobile */ - -#define INTEL_HSW_GT1_IDS(MACRO__, ...) \ - INTEL_HSW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_HSW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x0402, ## __VA_ARGS__), /* GT1 desktop */ \ - MACRO__(0x0406, ## __VA_ARGS__), /* GT1 mobile */ \ - MACRO__(0x040A, ## __VA_ARGS__), /* GT1 server */ \ - MACRO__(0x040B, ## __VA_ARGS__), /* GT1 reserved */ \ - MACRO__(0x040E, ## __VA_ARGS__), /* GT1 reserved */ \ - MACRO__(0x0C02, ## __VA_ARGS__), /* SDV GT1 desktop */ \ - MACRO__(0x0C06, ## __VA_ARGS__), /* SDV GT1 mobile */ \ - MACRO__(0x0C0A, ## __VA_ARGS__), /* SDV GT1 server */ \ - MACRO__(0x0C0B, ## __VA_ARGS__), /* SDV GT1 reserved */ \ - MACRO__(0x0C0E, ## __VA_ARGS__), /* SDV GT1 reserved */ \ - MACRO__(0x0D02, ## __VA_ARGS__), /* CRW GT1 desktop */ \ - MACRO__(0x0D06, ## __VA_ARGS__), /* CRW GT1 mobile */ \ - MACRO__(0x0D0A, ## __VA_ARGS__), /* CRW GT1 server */ \ - MACRO__(0x0D0B, ## __VA_ARGS__), /* CRW GT1 reserved */ \ - MACRO__(0x0D0E, ## __VA_ARGS__) /* CRW GT1 reserved */ - -#define INTEL_HSW_ULT_GT2_IDS(MACRO__, ...) \ - MACRO__(0x0A12, ## __VA_ARGS__), /* ULT GT2 desktop */ \ - MACRO__(0x0A16, ## __VA_ARGS__), /* ULT GT2 mobile */ \ - MACRO__(0x0A1A, ## __VA_ARGS__), /* ULT GT2 server */ \ - MACRO__(0x0A1B, ## __VA_ARGS__) /* ULT GT2 reserved */ \ - -#define INTEL_HSW_ULX_GT2_IDS(MACRO__, ...) \ - MACRO__(0x0A1E, ## __VA_ARGS__) /* ULX GT2 mobile */ \ - -#define INTEL_HSW_GT2_IDS(MACRO__, ...) \ - INTEL_HSW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_HSW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x0412, ## __VA_ARGS__), /* GT2 desktop */ \ - MACRO__(0x0416, ## __VA_ARGS__), /* GT2 mobile */ \ - MACRO__(0x041A, ## __VA_ARGS__), /* GT2 server */ \ - MACRO__(0x041B, ## __VA_ARGS__), /* GT2 reserved */ \ - MACRO__(0x041E, ## __VA_ARGS__), /* GT2 reserved */ \ - MACRO__(0x0C12, ## __VA_ARGS__), /* SDV GT2 desktop */ \ - MACRO__(0x0C16, ## __VA_ARGS__), /* SDV GT2 mobile */ \ - MACRO__(0x0C1A, ## __VA_ARGS__), /* SDV GT2 server */ \ - MACRO__(0x0C1B, ## __VA_ARGS__), /* SDV GT2 reserved */ \ - MACRO__(0x0C1E, ## __VA_ARGS__), /* SDV GT2 reserved */ \ - MACRO__(0x0D12, ## __VA_ARGS__), /* CRW GT2 desktop */ \ - MACRO__(0x0D16, ## __VA_ARGS__), /* CRW GT2 mobile */ \ - MACRO__(0x0D1A, ## __VA_ARGS__), /* CRW GT2 server */ \ - MACRO__(0x0D1B, ## __VA_ARGS__), /* CRW GT2 reserved */ \ - MACRO__(0x0D1E, ## __VA_ARGS__) /* CRW GT2 reserved */ - -#define INTEL_HSW_ULT_GT3_IDS(MACRO__, ...) \ - MACRO__(0x0A22, ## __VA_ARGS__), /* ULT GT3 desktop */ \ - MACRO__(0x0A26, ## __VA_ARGS__), /* ULT GT3 mobile */ \ - MACRO__(0x0A2A, ## __VA_ARGS__), /* ULT GT3 server */ \ - MACRO__(0x0A2B, ## __VA_ARGS__), /* ULT GT3 reserved */ \ - MACRO__(0x0A2E, ## __VA_ARGS__) /* ULT GT3 reserved */ - -#define INTEL_HSW_GT3_IDS(MACRO__, ...) \ - INTEL_HSW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x0422, ## __VA_ARGS__), /* GT3 desktop */ \ - MACRO__(0x0426, ## __VA_ARGS__), /* GT3 mobile */ \ - MACRO__(0x042A, ## __VA_ARGS__), /* GT3 server */ \ - MACRO__(0x042B, ## __VA_ARGS__), /* GT3 reserved */ \ - MACRO__(0x042E, ## __VA_ARGS__), /* GT3 reserved */ \ - MACRO__(0x0C22, ## __VA_ARGS__), /* SDV GT3 desktop */ \ - MACRO__(0x0C26, ## __VA_ARGS__), /* SDV GT3 mobile */ \ - MACRO__(0x0C2A, ## __VA_ARGS__), /* SDV GT3 server */ \ - MACRO__(0x0C2B, ## __VA_ARGS__), /* SDV GT3 reserved */ \ - MACRO__(0x0C2E, ## __VA_ARGS__), /* SDV GT3 reserved */ \ - MACRO__(0x0D22, ## __VA_ARGS__), /* CRW GT3 desktop */ \ - MACRO__(0x0D26, ## __VA_ARGS__), /* CRW GT3 mobile */ \ - MACRO__(0x0D2A, ## __VA_ARGS__), /* CRW GT3 server */ \ - MACRO__(0x0D2B, ## __VA_ARGS__), /* CRW GT3 reserved */ \ - MACRO__(0x0D2E, ## __VA_ARGS__) /* CRW GT3 reserved */ - -#define INTEL_HSW_IDS(MACRO__, ...) \ - INTEL_HSW_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_HSW_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_HSW_GT3_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_VLV_IDS(MACRO__, ...) \ - MACRO__(0x0f30, ## __VA_ARGS__), \ - MACRO__(0x0f31, ## __VA_ARGS__), \ - MACRO__(0x0f32, ## __VA_ARGS__), \ - MACRO__(0x0f33, ## __VA_ARGS__) - -#define INTEL_BDW_ULT_GT1_IDS(MACRO__, ...) \ - MACRO__(0x1606, ## __VA_ARGS__), /* GT1 ULT */ \ - MACRO__(0x160B, ## __VA_ARGS__) /* GT1 Iris */ - -#define INTEL_BDW_ULX_GT1_IDS(MACRO__, ...) \ - MACRO__(0x160E, ## __VA_ARGS__) /* GT1 ULX */ - -#define INTEL_BDW_GT1_IDS(MACRO__, ...) \ - INTEL_BDW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x1602, ## __VA_ARGS__), /* GT1 ULT */ \ - MACRO__(0x160A, ## __VA_ARGS__), /* GT1 Server */ \ - MACRO__(0x160D, ## __VA_ARGS__) /* GT1 Workstation */ - -#define INTEL_BDW_ULT_GT2_IDS(MACRO__, ...) \ - MACRO__(0x1616, ## __VA_ARGS__), /* GT2 ULT */ \ - MACRO__(0x161B, ## __VA_ARGS__) /* GT2 ULT */ - -#define INTEL_BDW_ULX_GT2_IDS(MACRO__, ...) \ - MACRO__(0x161E, ## __VA_ARGS__) /* GT2 ULX */ - -#define INTEL_BDW_GT2_IDS(MACRO__, ...) \ - INTEL_BDW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x1612, ## __VA_ARGS__), /* GT2 Halo */ \ - MACRO__(0x161A, ## __VA_ARGS__), /* GT2 Server */ \ - MACRO__(0x161D, ## __VA_ARGS__) /* GT2 Workstation */ - -#define INTEL_BDW_ULT_GT3_IDS(MACRO__, ...) \ - MACRO__(0x1626, ## __VA_ARGS__), /* ULT */ \ - MACRO__(0x162B, ## __VA_ARGS__) /* Iris */ \ - -#define INTEL_BDW_ULX_GT3_IDS(MACRO__, ...) \ - MACRO__(0x162E, ## __VA_ARGS__) /* ULX */ - -#define INTEL_BDW_GT3_IDS(MACRO__, ...) \ - INTEL_BDW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_ULX_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x1622, ## __VA_ARGS__), /* ULT */ \ - MACRO__(0x162A, ## __VA_ARGS__), /* Server */ \ - MACRO__(0x162D, ## __VA_ARGS__) /* Workstation */ - -#define INTEL_BDW_ULT_RSVD_IDS(MACRO__, ...) \ - MACRO__(0x1636, ## __VA_ARGS__), /* ULT */ \ - MACRO__(0x163B, ## __VA_ARGS__) /* Iris */ - -#define INTEL_BDW_ULX_RSVD_IDS(MACRO__, ...) \ - MACRO__(0x163E, ## __VA_ARGS__) /* ULX */ - -#define INTEL_BDW_RSVD_IDS(MACRO__, ...) \ - INTEL_BDW_ULT_RSVD_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_ULX_RSVD_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x1632, ## __VA_ARGS__), /* ULT */ \ - MACRO__(0x163A, ## __VA_ARGS__), /* Server */ \ - MACRO__(0x163D, ## __VA_ARGS__) /* Workstation */ - -#define INTEL_BDW_IDS(MACRO__, ...) \ - INTEL_BDW_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_BDW_RSVD_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_CHV_IDS(MACRO__, ...) \ - MACRO__(0x22b0, ## __VA_ARGS__), \ - MACRO__(0x22b1, ## __VA_ARGS__), \ - MACRO__(0x22b2, ## __VA_ARGS__), \ - MACRO__(0x22b3, ## __VA_ARGS__) - -#define INTEL_SKL_ULT_GT1_IDS(MACRO__, ...) \ - MACRO__(0x1906, ## __VA_ARGS__), /* ULT GT1 */ \ - MACRO__(0x1913, ## __VA_ARGS__) /* ULT GT1.5 */ - -#define INTEL_SKL_ULX_GT1_IDS(MACRO__, ...) \ - MACRO__(0x190E, ## __VA_ARGS__), /* ULX GT1 */ \ - MACRO__(0x1915, ## __VA_ARGS__) /* ULX GT1.5 */ - -#define INTEL_SKL_GT1_IDS(MACRO__, ...) \ - INTEL_SKL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SKL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x1902, ## __VA_ARGS__), /* DT GT1 */ \ - MACRO__(0x190A, ## __VA_ARGS__), /* SRV GT1 */ \ - MACRO__(0x190B, ## __VA_ARGS__), /* Halo GT1 */ \ - MACRO__(0x1917, ## __VA_ARGS__) /* DT GT1.5 */ - -#define INTEL_SKL_ULT_GT2_IDS(MACRO__, ...) \ - MACRO__(0x1916, ## __VA_ARGS__), /* ULT GT2 */ \ - MACRO__(0x1921, ## __VA_ARGS__) /* ULT GT2F */ - -#define INTEL_SKL_ULX_GT2_IDS(MACRO__, ...) \ - MACRO__(0x191E, ## __VA_ARGS__) /* ULX GT2 */ - -#define INTEL_SKL_GT2_IDS(MACRO__, ...) \ - INTEL_SKL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SKL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x1912, ## __VA_ARGS__), /* DT GT2 */ \ - MACRO__(0x191A, ## __VA_ARGS__), /* SRV GT2 */ \ - MACRO__(0x191B, ## __VA_ARGS__), /* Halo GT2 */ \ - MACRO__(0x191D, ## __VA_ARGS__) /* WKS GT2 */ - -#define INTEL_SKL_ULT_GT3_IDS(MACRO__, ...) \ - MACRO__(0x1923, ## __VA_ARGS__), /* ULT GT3 */ \ - MACRO__(0x1926, ## __VA_ARGS__), /* ULT GT3e */ \ - MACRO__(0x1927, ## __VA_ARGS__) /* ULT GT3e */ - -#define INTEL_SKL_GT3_IDS(MACRO__, ...) \ - INTEL_SKL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x192A, ## __VA_ARGS__), /* SRV GT3 */ \ - MACRO__(0x192B, ## __VA_ARGS__), /* Halo GT3e */ \ - MACRO__(0x192D, ## __VA_ARGS__) /* SRV GT3e */ - -#define INTEL_SKL_GT4_IDS(MACRO__, ...) \ - MACRO__(0x1932, ## __VA_ARGS__), /* DT GT4 */ \ - MACRO__(0x193A, ## __VA_ARGS__), /* SRV GT4e */ \ - MACRO__(0x193B, ## __VA_ARGS__), /* Halo GT4e */ \ - MACRO__(0x193D, ## __VA_ARGS__) /* WKS GT4e */ - -#define INTEL_SKL_IDS(MACRO__, ...) \ - INTEL_SKL_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SKL_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SKL_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_SKL_GT4_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_BXT_IDS(MACRO__, ...) \ - MACRO__(0x0A84, ## __VA_ARGS__), \ - MACRO__(0x1A84, ## __VA_ARGS__), \ - MACRO__(0x1A85, ## __VA_ARGS__), \ - MACRO__(0x5A84, ## __VA_ARGS__), /* APL HD Graphics 505 */ \ - MACRO__(0x5A85, ## __VA_ARGS__) /* APL HD Graphics 500 */ - -#define INTEL_GLK_IDS(MACRO__, ...) \ - MACRO__(0x3184, ## __VA_ARGS__), \ - MACRO__(0x3185, ## __VA_ARGS__) - -#define INTEL_KBL_ULT_GT1_IDS(MACRO__, ...) \ - MACRO__(0x5906, ## __VA_ARGS__), /* ULT GT1 */ \ - MACRO__(0x5913, ## __VA_ARGS__) /* ULT GT1.5 */ - -#define INTEL_KBL_ULX_GT1_IDS(MACRO__, ...) \ - MACRO__(0x590E, ## __VA_ARGS__), /* ULX GT1 */ \ - MACRO__(0x5915, ## __VA_ARGS__) /* ULX GT1.5 */ - -#define INTEL_KBL_GT1_IDS(MACRO__, ...) \ - INTEL_KBL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_KBL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x5902, ## __VA_ARGS__), /* DT GT1 */ \ - MACRO__(0x5908, ## __VA_ARGS__), /* Halo GT1 */ \ - MACRO__(0x590A, ## __VA_ARGS__), /* SRV GT1 */ \ - MACRO__(0x590B, ## __VA_ARGS__) /* Halo GT1 */ - -#define INTEL_KBL_ULT_GT2_IDS(MACRO__, ...) \ - MACRO__(0x5916, ## __VA_ARGS__), /* ULT GT2 */ \ - MACRO__(0x5921, ## __VA_ARGS__) /* ULT GT2F */ - -#define INTEL_KBL_ULX_GT2_IDS(MACRO__, ...) \ - MACRO__(0x591E, ## __VA_ARGS__) /* ULX GT2 */ - -#define INTEL_KBL_GT2_IDS(MACRO__, ...) \ - INTEL_KBL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_KBL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x5912, ## __VA_ARGS__), /* DT GT2 */ \ - MACRO__(0x5917, ## __VA_ARGS__), /* Mobile GT2 */ \ - MACRO__(0x591A, ## __VA_ARGS__), /* SRV GT2 */ \ - MACRO__(0x591B, ## __VA_ARGS__), /* Halo GT2 */ \ - MACRO__(0x591D, ## __VA_ARGS__) /* WKS GT2 */ - -#define INTEL_KBL_ULT_GT3_IDS(MACRO__, ...) \ - MACRO__(0x5926, ## __VA_ARGS__) /* ULT GT3 */ - -#define INTEL_KBL_GT3_IDS(MACRO__, ...) \ - INTEL_KBL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x5923, ## __VA_ARGS__), /* ULT GT3 */ \ - MACRO__(0x5927, ## __VA_ARGS__) /* ULT GT3 */ - -#define INTEL_KBL_GT4_IDS(MACRO__, ...) \ - MACRO__(0x593B, ## __VA_ARGS__) /* Halo GT4 */ - -/* AML/KBL Y GT2 */ -#define INTEL_AML_KBL_GT2_IDS(MACRO__, ...) \ - MACRO__(0x591C, ## __VA_ARGS__), /* ULX GT2 */ \ - MACRO__(0x87C0, ## __VA_ARGS__) /* ULX GT2 */ - -/* AML/CFL Y GT2 */ -#define INTEL_AML_CFL_GT2_IDS(MACRO__, ...) \ - MACRO__(0x87CA, ## __VA_ARGS__) - -/* CML GT1 */ -#define INTEL_CML_GT1_IDS(MACRO__, ...) \ - MACRO__(0x9BA2, ## __VA_ARGS__), \ - MACRO__(0x9BA4, ## __VA_ARGS__), \ - MACRO__(0x9BA5, ## __VA_ARGS__), \ - MACRO__(0x9BA8, ## __VA_ARGS__) - -#define INTEL_CML_U_GT1_IDS(MACRO__, ...) \ - MACRO__(0x9B21, ## __VA_ARGS__), \ - MACRO__(0x9BAA, ## __VA_ARGS__), \ - MACRO__(0x9BAC, ## __VA_ARGS__) - -/* CML GT2 */ -#define INTEL_CML_GT2_IDS(MACRO__, ...) \ - MACRO__(0x9BC2, ## __VA_ARGS__), \ - MACRO__(0x9BC4, ## __VA_ARGS__), \ - MACRO__(0x9BC5, ## __VA_ARGS__), \ - MACRO__(0x9BC6, ## __VA_ARGS__), \ - MACRO__(0x9BC8, ## __VA_ARGS__), \ - MACRO__(0x9BE6, ## __VA_ARGS__), \ - MACRO__(0x9BF6, ## __VA_ARGS__) - -#define INTEL_CML_U_GT2_IDS(MACRO__, ...) \ - MACRO__(0x9B41, ## __VA_ARGS__), \ - MACRO__(0x9BCA, ## __VA_ARGS__), \ - MACRO__(0x9BCC, ## __VA_ARGS__) - -#define INTEL_CML_IDS(MACRO__, ...) \ - INTEL_CML_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CML_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CML_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CML_U_GT2_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_KBL_IDS(MACRO__, ...) \ - INTEL_KBL_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_KBL_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_KBL_GT4_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_AML_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__) - -/* CFL S */ -#define INTEL_CFL_S_GT1_IDS(MACRO__, ...) \ - MACRO__(0x3E90, ## __VA_ARGS__), /* SRV GT1 */ \ - MACRO__(0x3E93, ## __VA_ARGS__), /* SRV GT1 */ \ - MACRO__(0x3E99, ## __VA_ARGS__) /* SRV GT1 */ - -#define INTEL_CFL_S_GT2_IDS(MACRO__, ...) \ - MACRO__(0x3E91, ## __VA_ARGS__), /* SRV GT2 */ \ - MACRO__(0x3E92, ## __VA_ARGS__), /* SRV GT2 */ \ - MACRO__(0x3E96, ## __VA_ARGS__), /* SRV GT2 */ \ - MACRO__(0x3E98, ## __VA_ARGS__), /* SRV GT2 */ \ - MACRO__(0x3E9A, ## __VA_ARGS__) /* SRV GT2 */ - -/* CFL H */ -#define INTEL_CFL_H_GT1_IDS(MACRO__, ...) \ - MACRO__(0x3E9C, ## __VA_ARGS__) - -#define INTEL_CFL_H_GT2_IDS(MACRO__, ...) \ - MACRO__(0x3E94, ## __VA_ARGS__), /* Halo GT2 */ \ - MACRO__(0x3E9B, ## __VA_ARGS__) /* Halo GT2 */ - -/* CFL U GT2 */ -#define INTEL_CFL_U_GT2_IDS(MACRO__, ...) \ - MACRO__(0x3EA9, ## __VA_ARGS__) - -/* CFL U GT3 */ -#define INTEL_CFL_U_GT3_IDS(MACRO__, ...) \ - MACRO__(0x3EA5, ## __VA_ARGS__), /* ULT GT3 */ \ - MACRO__(0x3EA6, ## __VA_ARGS__), /* ULT GT3 */ \ - MACRO__(0x3EA7, ## __VA_ARGS__), /* ULT GT3 */ \ - MACRO__(0x3EA8, ## __VA_ARGS__) /* ULT GT3 */ - -#define INTEL_CFL_IDS(MACRO__, ...) \ - INTEL_CFL_S_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CFL_S_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CFL_H_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CFL_H_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CFL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_CFL_U_GT3_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_AML_CFL_GT2_IDS(MACRO__, ## __VA_ARGS__) - -/* WHL/CFL U GT1 */ -#define INTEL_WHL_U_GT1_IDS(MACRO__, ...) \ - MACRO__(0x3EA1, ## __VA_ARGS__), \ - MACRO__(0x3EA4, ## __VA_ARGS__) - -/* WHL/CFL U GT2 */ -#define INTEL_WHL_U_GT2_IDS(MACRO__, ...) \ - MACRO__(0x3EA0, ## __VA_ARGS__), \ - MACRO__(0x3EA3, ## __VA_ARGS__) - -/* WHL/CFL U GT3 */ -#define INTEL_WHL_U_GT3_IDS(MACRO__, ...) \ - MACRO__(0x3EA2, ## __VA_ARGS__) - -#define INTEL_WHL_IDS(MACRO__, ...) \ - INTEL_WHL_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_WHL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_WHL_U_GT3_IDS(MACRO__, ## __VA_ARGS__) - -/* CNL */ -#define INTEL_CNL_PORT_F_IDS(MACRO__, ...) \ - MACRO__(0x5A44, ## __VA_ARGS__), \ - MACRO__(0x5A4C, ## __VA_ARGS__), \ - MACRO__(0x5A54, ## __VA_ARGS__), \ - MACRO__(0x5A5C, ## __VA_ARGS__) - -#define INTEL_CNL_IDS(MACRO__, ...) \ - INTEL_CNL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x5A40, ## __VA_ARGS__), \ - MACRO__(0x5A41, ## __VA_ARGS__), \ - MACRO__(0x5A42, ## __VA_ARGS__), \ - MACRO__(0x5A49, ## __VA_ARGS__), \ - MACRO__(0x5A4A, ## __VA_ARGS__), \ - MACRO__(0x5A50, ## __VA_ARGS__), \ - MACRO__(0x5A51, ## __VA_ARGS__), \ - MACRO__(0x5A52, ## __VA_ARGS__), \ - MACRO__(0x5A59, ## __VA_ARGS__), \ - MACRO__(0x5A5A, ## __VA_ARGS__) - -/* ICL */ -#define INTEL_ICL_PORT_F_IDS(MACRO__, ...) \ - MACRO__(0x8A50, ## __VA_ARGS__), \ - MACRO__(0x8A52, ## __VA_ARGS__), \ - MACRO__(0x8A53, ## __VA_ARGS__), \ - MACRO__(0x8A54, ## __VA_ARGS__), \ - MACRO__(0x8A56, ## __VA_ARGS__), \ - MACRO__(0x8A57, ## __VA_ARGS__), \ - MACRO__(0x8A58, ## __VA_ARGS__), \ - MACRO__(0x8A59, ## __VA_ARGS__), \ - MACRO__(0x8A5A, ## __VA_ARGS__), \ - MACRO__(0x8A5B, ## __VA_ARGS__), \ - MACRO__(0x8A5C, ## __VA_ARGS__), \ - MACRO__(0x8A70, ## __VA_ARGS__), \ - MACRO__(0x8A71, ## __VA_ARGS__) - -#define INTEL_ICL_IDS(MACRO__, ...) \ - INTEL_ICL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \ - MACRO__(0x8A51, ## __VA_ARGS__), \ - MACRO__(0x8A5D, ## __VA_ARGS__) - -/* EHL */ -#define INTEL_EHL_IDS(MACRO__, ...) \ - MACRO__(0x4541, ## __VA_ARGS__), \ - MACRO__(0x4551, ## __VA_ARGS__), \ - MACRO__(0x4555, ## __VA_ARGS__), \ - MACRO__(0x4557, ## __VA_ARGS__), \ - MACRO__(0x4570, ## __VA_ARGS__), \ - MACRO__(0x4571, ## __VA_ARGS__) - -/* JSL */ -#define INTEL_JSL_IDS(MACRO__, ...) \ - MACRO__(0x4E51, ## __VA_ARGS__), \ - MACRO__(0x4E55, ## __VA_ARGS__), \ - MACRO__(0x4E57, ## __VA_ARGS__), \ - MACRO__(0x4E61, ## __VA_ARGS__), \ - MACRO__(0x4E71, ## __VA_ARGS__) - -/* TGL */ -#define INTEL_TGL_GT1_IDS(MACRO__, ...) \ - MACRO__(0x9A60, ## __VA_ARGS__), \ - MACRO__(0x9A68, ## __VA_ARGS__), \ - MACRO__(0x9A70, ## __VA_ARGS__) - -#define INTEL_TGL_GT2_IDS(MACRO__, ...) \ - MACRO__(0x9A40, ## __VA_ARGS__), \ - MACRO__(0x9A49, ## __VA_ARGS__), \ - MACRO__(0x9A59, ## __VA_ARGS__), \ - MACRO__(0x9A78, ## __VA_ARGS__), \ - MACRO__(0x9AC0, ## __VA_ARGS__), \ - MACRO__(0x9AC9, ## __VA_ARGS__), \ - MACRO__(0x9AD9, ## __VA_ARGS__), \ - MACRO__(0x9AF8, ## __VA_ARGS__) - -#define INTEL_TGL_IDS(MACRO__, ...) \ - INTEL_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__) - -/* RKL */ -#define INTEL_RKL_IDS(MACRO__, ...) \ - MACRO__(0x4C80, ## __VA_ARGS__), \ - MACRO__(0x4C8A, ## __VA_ARGS__), \ - MACRO__(0x4C8B, ## __VA_ARGS__), \ - MACRO__(0x4C8C, ## __VA_ARGS__), \ - MACRO__(0x4C90, ## __VA_ARGS__), \ - MACRO__(0x4C9A, ## __VA_ARGS__) - -/* DG1 */ -#define INTEL_DG1_IDS(MACRO__, ...) \ - MACRO__(0x4905, ## __VA_ARGS__), \ - MACRO__(0x4906, ## __VA_ARGS__), \ - MACRO__(0x4907, ## __VA_ARGS__), \ - MACRO__(0x4908, ## __VA_ARGS__), \ - MACRO__(0x4909, ## __VA_ARGS__) - -/* ADL-S */ -#define INTEL_ADLS_IDS(MACRO__, ...) \ - MACRO__(0x4680, ## __VA_ARGS__), \ - MACRO__(0x4682, ## __VA_ARGS__), \ - MACRO__(0x4688, ## __VA_ARGS__), \ - MACRO__(0x468A, ## __VA_ARGS__), \ - MACRO__(0x468B, ## __VA_ARGS__), \ - MACRO__(0x4690, ## __VA_ARGS__), \ - MACRO__(0x4692, ## __VA_ARGS__), \ - MACRO__(0x4693, ## __VA_ARGS__) - -/* ADL-P */ -#define INTEL_ADLP_IDS(MACRO__, ...) \ - MACRO__(0x46A0, ## __VA_ARGS__), \ - MACRO__(0x46A1, ## __VA_ARGS__), \ - MACRO__(0x46A2, ## __VA_ARGS__), \ - MACRO__(0x46A3, ## __VA_ARGS__), \ - MACRO__(0x46A6, ## __VA_ARGS__), \ - MACRO__(0x46A8, ## __VA_ARGS__), \ - MACRO__(0x46AA, ## __VA_ARGS__), \ - MACRO__(0x462A, ## __VA_ARGS__), \ - MACRO__(0x4626, ## __VA_ARGS__), \ - MACRO__(0x4628, ## __VA_ARGS__), \ - MACRO__(0x46B0, ## __VA_ARGS__), \ - MACRO__(0x46B1, ## __VA_ARGS__), \ - MACRO__(0x46B2, ## __VA_ARGS__), \ - MACRO__(0x46B3, ## __VA_ARGS__), \ - MACRO__(0x46C0, ## __VA_ARGS__), \ - MACRO__(0x46C1, ## __VA_ARGS__), \ - MACRO__(0x46C2, ## __VA_ARGS__), \ - MACRO__(0x46C3, ## __VA_ARGS__) - -/* ADL-N */ -#define INTEL_ADLN_IDS(MACRO__, ...) \ - MACRO__(0x46D0, ## __VA_ARGS__), \ - MACRO__(0x46D1, ## __VA_ARGS__), \ - MACRO__(0x46D2, ## __VA_ARGS__), \ - MACRO__(0x46D3, ## __VA_ARGS__), \ - MACRO__(0x46D4, ## __VA_ARGS__) - -/* RPL-S */ -#define INTEL_RPLS_IDS(MACRO__, ...) \ - MACRO__(0xA780, ## __VA_ARGS__), \ - MACRO__(0xA781, ## __VA_ARGS__), \ - MACRO__(0xA782, ## __VA_ARGS__), \ - MACRO__(0xA783, ## __VA_ARGS__), \ - MACRO__(0xA788, ## __VA_ARGS__), \ - MACRO__(0xA789, ## __VA_ARGS__), \ - MACRO__(0xA78A, ## __VA_ARGS__), \ - MACRO__(0xA78B, ## __VA_ARGS__) - -/* RPL-U */ -#define INTEL_RPLU_IDS(MACRO__, ...) \ - MACRO__(0xA721, ## __VA_ARGS__), \ - MACRO__(0xA7A1, ## __VA_ARGS__), \ - MACRO__(0xA7A9, ## __VA_ARGS__), \ - MACRO__(0xA7AC, ## __VA_ARGS__), \ - MACRO__(0xA7AD, ## __VA_ARGS__) - -/* RPL-P */ -#define INTEL_RPLP_IDS(MACRO__, ...) \ - MACRO__(0xA720, ## __VA_ARGS__), \ - MACRO__(0xA7A0, ## __VA_ARGS__), \ - MACRO__(0xA7A8, ## __VA_ARGS__), \ - MACRO__(0xA7AA, ## __VA_ARGS__), \ - MACRO__(0xA7AB, ## __VA_ARGS__) - -/* DG2 */ -#define INTEL_DG2_G10_IDS(MACRO__, ...) \ - MACRO__(0x5690, ## __VA_ARGS__), \ - MACRO__(0x5691, ## __VA_ARGS__), \ - MACRO__(0x5692, ## __VA_ARGS__), \ - MACRO__(0x56A0, ## __VA_ARGS__), \ - MACRO__(0x56A1, ## __VA_ARGS__), \ - MACRO__(0x56A2, ## __VA_ARGS__), \ - MACRO__(0x56BE, ## __VA_ARGS__), \ - MACRO__(0x56BF, ## __VA_ARGS__) - -#define INTEL_DG2_G11_IDS(MACRO__, ...) \ - MACRO__(0x5693, ## __VA_ARGS__), \ - MACRO__(0x5694, ## __VA_ARGS__), \ - MACRO__(0x5695, ## __VA_ARGS__), \ - MACRO__(0x56A5, ## __VA_ARGS__), \ - MACRO__(0x56A6, ## __VA_ARGS__), \ - MACRO__(0x56B0, ## __VA_ARGS__), \ - MACRO__(0x56B1, ## __VA_ARGS__), \ - MACRO__(0x56BA, ## __VA_ARGS__), \ - MACRO__(0x56BB, ## __VA_ARGS__), \ - MACRO__(0x56BC, ## __VA_ARGS__), \ - MACRO__(0x56BD, ## __VA_ARGS__) - -#define INTEL_DG2_G12_IDS(MACRO__, ...) \ - MACRO__(0x5696, ## __VA_ARGS__), \ - MACRO__(0x5697, ## __VA_ARGS__), \ - MACRO__(0x56A3, ## __VA_ARGS__), \ - MACRO__(0x56A4, ## __VA_ARGS__), \ - MACRO__(0x56B2, ## __VA_ARGS__), \ - MACRO__(0x56B3, ## __VA_ARGS__) - -#define INTEL_DG2_IDS(MACRO__, ...) \ - INTEL_DG2_G10_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_DG2_G11_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) - -#define INTEL_ATS_M150_IDS(MACRO__, ...) \ - MACRO__(0x56C0, ## __VA_ARGS__), \ - MACRO__(0x56C2, ## __VA_ARGS__) - -#define INTEL_ATS_M75_IDS(MACRO__, ...) \ - MACRO__(0x56C1, ## __VA_ARGS__) - -#define INTEL_ATS_M_IDS(MACRO__, ...) \ - INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \ - INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) - -/* ARL */ -#define INTEL_ARL_IDS(MACRO__, ...) \ - MACRO__(0x7D41, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ - MACRO__(0xB640, ## __VA_ARGS__) - -/* MTL */ -#define INTEL_MTL_IDS(MACRO__, ...) \ - MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D55, ## __VA_ARGS__), \ - MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7DD5, ## __VA_ARGS__) - -/* PVC */ -#define INTEL_PVC_IDS(MACRO__, ...) \ - MACRO__(0x0B69, ## __VA_ARGS__), \ - MACRO__(0x0B6E, ## __VA_ARGS__), \ - MACRO__(0x0BD4, ## __VA_ARGS__), \ - MACRO__(0x0BD5, ## __VA_ARGS__), \ - MACRO__(0x0BD6, ## __VA_ARGS__), \ - MACRO__(0x0BD7, ## __VA_ARGS__), \ - MACRO__(0x0BD8, ## __VA_ARGS__), \ - MACRO__(0x0BD9, ## __VA_ARGS__), \ - MACRO__(0x0BDA, ## __VA_ARGS__), \ - MACRO__(0x0BDB, ## __VA_ARGS__), \ - MACRO__(0x0BE0, ## __VA_ARGS__), \ - MACRO__(0x0BE1, ## __VA_ARGS__), \ - MACRO__(0x0BE5, ## __VA_ARGS__) - -/* LNL */ -#define INTEL_LNL_IDS(MACRO__, ...) \ - MACRO__(0x6420, ## __VA_ARGS__), \ - MACRO__(0x64A0, ## __VA_ARGS__), \ - MACRO__(0x64B0, ## __VA_ARGS__) - -/* BMG */ -#define INTEL_BMG_IDS(MACRO__, ...) \ - MACRO__(0xE202, ## __VA_ARGS__), \ - MACRO__(0xE20B, ## __VA_ARGS__), \ - MACRO__(0xE20C, ## __VA_ARGS__), \ - MACRO__(0xE20D, ## __VA_ARGS__), \ - MACRO__(0xE212, ## __VA_ARGS__) - -/* PTL */ -#define INTEL_PTL_IDS(MACRO__, ...) \ - MACRO__(0xB080, ## __VA_ARGS__), \ - MACRO__(0xB081, ## __VA_ARGS__), \ - MACRO__(0xB082, ## __VA_ARGS__), \ - MACRO__(0xB090, ## __VA_ARGS__), \ - MACRO__(0xB091, ## __VA_ARGS__), \ - MACRO__(0xB092, ## __VA_ARGS__), \ - MACRO__(0xB0A0, ## __VA_ARGS__), \ - MACRO__(0xB0A1, ## __VA_ARGS__), \ - MACRO__(0xB0A2, ## __VA_ARGS__) - -#endif /* _I915_PCIIDS_H */ diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h new file mode 100644 index 000000000000..7632507af166 --- /dev/null +++ b/include/drm/intel/pciids.h @@ -0,0 +1,825 @@ +/* + * Copyright 2013 Intel Corporation + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef __PCIIDS_H__ +#define __PCIIDS_H__ + +#ifdef __KERNEL__ +#define INTEL_VGA_DEVICE(_id, _info) { \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \ + .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ + .driver_data = (kernel_ulong_t)(_info), \ +} + +#define INTEL_QUANTA_VGA_DEVICE(_info) { \ + .vendor = PCI_VENDOR_ID_INTEL, .device = 0x16a, \ + .subvendor = 0x152d, .subdevice = 0x8990, \ + .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ + .driver_data = (kernel_ulong_t)(_info), \ +} +#endif + +#define INTEL_I810_IDS(MACRO__, ...) \ + MACRO__(0x7121, ## __VA_ARGS__), /* I810 */ \ + MACRO__(0x7123, ## __VA_ARGS__), /* I810_DC100 */ \ + MACRO__(0x7125, ## __VA_ARGS__) /* I810_E */ + +#define INTEL_I815_IDS(MACRO__, ...) \ + MACRO__(0x1132, ## __VA_ARGS__) /* I815*/ + +#define INTEL_I830_IDS(MACRO__, ...) \ + MACRO__(0x3577, ## __VA_ARGS__) + +#define INTEL_I845G_IDS(MACRO__, ...) \ + MACRO__(0x2562, ## __VA_ARGS__) + +#define INTEL_I85X_IDS(MACRO__, ...) \ + MACRO__(0x3582, ## __VA_ARGS__), /* I855_GM */ \ + MACRO__(0x358e, ## __VA_ARGS__) + +#define INTEL_I865G_IDS(MACRO__, ...) \ + MACRO__(0x2572, ## __VA_ARGS__) /* I865_G */ + +#define INTEL_I915G_IDS(MACRO__, ...) \ + MACRO__(0x2582, ## __VA_ARGS__), /* I915_G */ \ + MACRO__(0x258a, ## __VA_ARGS__) /* E7221_G */ + +#define INTEL_I915GM_IDS(MACRO__, ...) \ + MACRO__(0x2592, ## __VA_ARGS__) /* I915_GM */ + +#define INTEL_I945G_IDS(MACRO__, ...) \ + MACRO__(0x2772, ## __VA_ARGS__) /* I945_G */ + +#define INTEL_I945GM_IDS(MACRO__, ...) \ + MACRO__(0x27a2, ## __VA_ARGS__), /* I945_GM */ \ + MACRO__(0x27ae, ## __VA_ARGS__) /* I945_GME */ + +#define INTEL_I965G_IDS(MACRO__, ...) \ + MACRO__(0x2972, ## __VA_ARGS__), /* I946_GZ */ \ + MACRO__(0x2982, ## __VA_ARGS__), /* G35_G */ \ + MACRO__(0x2992, ## __VA_ARGS__), /* I965_Q */ \ + MACRO__(0x29a2, ## __VA_ARGS__) /* I965_G */ + +#define INTEL_G33_IDS(MACRO__, ...) \ + MACRO__(0x29b2, ## __VA_ARGS__), /* Q35_G */ \ + MACRO__(0x29c2, ## __VA_ARGS__), /* G33_G */ \ + MACRO__(0x29d2, ## __VA_ARGS__) /* Q33_G */ + +#define INTEL_I965GM_IDS(MACRO__, ...) \ + MACRO__(0x2a02, ## __VA_ARGS__), /* I965_GM */ \ + MACRO__(0x2a12, ## __VA_ARGS__) /* I965_GME */ + +#define INTEL_GM45_IDS(MACRO__, ...) \ + MACRO__(0x2a42, ## __VA_ARGS__) /* GM45_G */ + +#define INTEL_G45_IDS(MACRO__, ...) \ + MACRO__(0x2e02, ## __VA_ARGS__), /* IGD_E_G */ \ + MACRO__(0x2e12, ## __VA_ARGS__), /* Q45_G */ \ + MACRO__(0x2e22, ## __VA_ARGS__), /* G45_G */ \ + MACRO__(0x2e32, ## __VA_ARGS__), /* G41_G */ \ + MACRO__(0x2e42, ## __VA_ARGS__), /* B43_G */ \ + MACRO__(0x2e92, ## __VA_ARGS__) /* B43_G.1 */ + +#define INTEL_PNV_G_IDS(MACRO__, ...) \ + MACRO__(0xa001, ## __VA_ARGS__) + +#define INTEL_PNV_M_IDS(MACRO__, ...) \ + MACRO__(0xa011, ## __VA_ARGS__) + +#define INTEL_PNV_IDS(MACRO__, ...) \ + INTEL_PNV_G_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_PNV_M_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_ILK_D_IDS(MACRO__, ...) \ + MACRO__(0x0042, ## __VA_ARGS__) + +#define INTEL_ILK_M_IDS(MACRO__, ...) \ + MACRO__(0x0046, ## __VA_ARGS__) + +#define INTEL_ILK_IDS(MACRO__, ...) \ + INTEL_ILK_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_ILK_M_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_SNB_D_GT1_IDS(MACRO__, ...) \ + MACRO__(0x0102, ## __VA_ARGS__), \ + MACRO__(0x010A, ## __VA_ARGS__) + +#define INTEL_SNB_D_GT2_IDS(MACRO__, ...) \ + MACRO__(0x0112, ## __VA_ARGS__), \ + MACRO__(0x0122, ## __VA_ARGS__) + +#define INTEL_SNB_D_IDS(MACRO__, ...) \ + INTEL_SNB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SNB_D_GT2_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_SNB_M_GT1_IDS(MACRO__, ...) \ + MACRO__(0x0106, ## __VA_ARGS__) + +#define INTEL_SNB_M_GT2_IDS(MACRO__, ...) \ + MACRO__(0x0116, ## __VA_ARGS__), \ + MACRO__(0x0126, ## __VA_ARGS__) + +#define INTEL_SNB_M_IDS(MACRO__, ...) \ + INTEL_SNB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SNB_M_GT2_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_SNB_IDS(MACRO__, ...) \ + INTEL_SNB_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SNB_M_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_IVB_M_GT1_IDS(MACRO__, ...) \ + MACRO__(0x0156, ## __VA_ARGS__) /* GT1 mobile */ + +#define INTEL_IVB_M_GT2_IDS(MACRO__, ...) \ + MACRO__(0x0166, ## __VA_ARGS__) /* GT2 mobile */ + +#define INTEL_IVB_M_IDS(MACRO__, ...) \ + INTEL_IVB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_IVB_M_GT2_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_IVB_D_GT1_IDS(MACRO__, ...) \ + MACRO__(0x0152, ## __VA_ARGS__), /* GT1 desktop */ \ + MACRO__(0x015a, ## __VA_ARGS__) /* GT1 server */ + +#define INTEL_IVB_D_GT2_IDS(MACRO__, ...) \ + MACRO__(0x0162, ## __VA_ARGS__), /* GT2 desktop */ \ + MACRO__(0x016a, ## __VA_ARGS__) /* GT2 server */ + +#define INTEL_IVB_D_IDS(MACRO__, ...) \ + INTEL_IVB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_IVB_D_GT2_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_IVB_IDS(MACRO__, ...) \ + INTEL_IVB_M_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_IVB_D_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_IVB_Q_IDS(MACRO__, ...) \ + INTEL_QUANTA_VGA_DEVICE(__VA_ARGS__) /* Quanta transcode */ + +#define INTEL_HSW_ULT_GT1_IDS(MACRO__, ...) \ + MACRO__(0x0A02, ## __VA_ARGS__), /* ULT GT1 desktop */ \ + MACRO__(0x0A06, ## __VA_ARGS__), /* ULT GT1 mobile */ \ + MACRO__(0x0A0A, ## __VA_ARGS__), /* ULT GT1 server */ \ + MACRO__(0x0A0B, ## __VA_ARGS__) /* ULT GT1 reserved */ + +#define INTEL_HSW_ULX_GT1_IDS(MACRO__, ...) \ + MACRO__(0x0A0E, ## __VA_ARGS__) /* ULX GT1 mobile */ + +#define INTEL_HSW_GT1_IDS(MACRO__, ...) \ + INTEL_HSW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_HSW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x0402, ## __VA_ARGS__), /* GT1 desktop */ \ + MACRO__(0x0406, ## __VA_ARGS__), /* GT1 mobile */ \ + MACRO__(0x040A, ## __VA_ARGS__), /* GT1 server */ \ + MACRO__(0x040B, ## __VA_ARGS__), /* GT1 reserved */ \ + MACRO__(0x040E, ## __VA_ARGS__), /* GT1 reserved */ \ + MACRO__(0x0C02, ## __VA_ARGS__), /* SDV GT1 desktop */ \ + MACRO__(0x0C06, ## __VA_ARGS__), /* SDV GT1 mobile */ \ + MACRO__(0x0C0A, ## __VA_ARGS__), /* SDV GT1 server */ \ + MACRO__(0x0C0B, ## __VA_ARGS__), /* SDV GT1 reserved */ \ + MACRO__(0x0C0E, ## __VA_ARGS__), /* SDV GT1 reserved */ \ + MACRO__(0x0D02, ## __VA_ARGS__), /* CRW GT1 desktop */ \ + MACRO__(0x0D06, ## __VA_ARGS__), /* CRW GT1 mobile */ \ + MACRO__(0x0D0A, ## __VA_ARGS__), /* CRW GT1 server */ \ + MACRO__(0x0D0B, ## __VA_ARGS__), /* CRW GT1 reserved */ \ + MACRO__(0x0D0E, ## __VA_ARGS__) /* CRW GT1 reserved */ + +#define INTEL_HSW_ULT_GT2_IDS(MACRO__, ...) \ + MACRO__(0x0A12, ## __VA_ARGS__), /* ULT GT2 desktop */ \ + MACRO__(0x0A16, ## __VA_ARGS__), /* ULT GT2 mobile */ \ + MACRO__(0x0A1A, ## __VA_ARGS__), /* ULT GT2 server */ \ + MACRO__(0x0A1B, ## __VA_ARGS__) /* ULT GT2 reserved */ \ + +#define INTEL_HSW_ULX_GT2_IDS(MACRO__, ...) \ + MACRO__(0x0A1E, ## __VA_ARGS__) /* ULX GT2 mobile */ \ + +#define INTEL_HSW_GT2_IDS(MACRO__, ...) \ + INTEL_HSW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_HSW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x0412, ## __VA_ARGS__), /* GT2 desktop */ \ + MACRO__(0x0416, ## __VA_ARGS__), /* GT2 mobile */ \ + MACRO__(0x041A, ## __VA_ARGS__), /* GT2 server */ \ + MACRO__(0x041B, ## __VA_ARGS__), /* GT2 reserved */ \ + MACRO__(0x041E, ## __VA_ARGS__), /* GT2 reserved */ \ + MACRO__(0x0C12, ## __VA_ARGS__), /* SDV GT2 desktop */ \ + MACRO__(0x0C16, ## __VA_ARGS__), /* SDV GT2 mobile */ \ + MACRO__(0x0C1A, ## __VA_ARGS__), /* SDV GT2 server */ \ + MACRO__(0x0C1B, ## __VA_ARGS__), /* SDV GT2 reserved */ \ + MACRO__(0x0C1E, ## __VA_ARGS__), /* SDV GT2 reserved */ \ + MACRO__(0x0D12, ## __VA_ARGS__), /* CRW GT2 desktop */ \ + MACRO__(0x0D16, ## __VA_ARGS__), /* CRW GT2 mobile */ \ + MACRO__(0x0D1A, ## __VA_ARGS__), /* CRW GT2 server */ \ + MACRO__(0x0D1B, ## __VA_ARGS__), /* CRW GT2 reserved */ \ + MACRO__(0x0D1E, ## __VA_ARGS__) /* CRW GT2 reserved */ + +#define INTEL_HSW_ULT_GT3_IDS(MACRO__, ...) \ + MACRO__(0x0A22, ## __VA_ARGS__), /* ULT GT3 desktop */ \ + MACRO__(0x0A26, ## __VA_ARGS__), /* ULT GT3 mobile */ \ + MACRO__(0x0A2A, ## __VA_ARGS__), /* ULT GT3 server */ \ + MACRO__(0x0A2B, ## __VA_ARGS__), /* ULT GT3 reserved */ \ + MACRO__(0x0A2E, ## __VA_ARGS__) /* ULT GT3 reserved */ + +#define INTEL_HSW_GT3_IDS(MACRO__, ...) \ + INTEL_HSW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x0422, ## __VA_ARGS__), /* GT3 desktop */ \ + MACRO__(0x0426, ## __VA_ARGS__), /* GT3 mobile */ \ + MACRO__(0x042A, ## __VA_ARGS__), /* GT3 server */ \ + MACRO__(0x042B, ## __VA_ARGS__), /* GT3 reserved */ \ + MACRO__(0x042E, ## __VA_ARGS__), /* GT3 reserved */ \ + MACRO__(0x0C22, ## __VA_ARGS__), /* SDV GT3 desktop */ \ + MACRO__(0x0C26, ## __VA_ARGS__), /* SDV GT3 mobile */ \ + MACRO__(0x0C2A, ## __VA_ARGS__), /* SDV GT3 server */ \ + MACRO__(0x0C2B, ## __VA_ARGS__), /* SDV GT3 reserved */ \ + MACRO__(0x0C2E, ## __VA_ARGS__), /* SDV GT3 reserved */ \ + MACRO__(0x0D22, ## __VA_ARGS__), /* CRW GT3 desktop */ \ + MACRO__(0x0D26, ## __VA_ARGS__), /* CRW GT3 mobile */ \ + MACRO__(0x0D2A, ## __VA_ARGS__), /* CRW GT3 server */ \ + MACRO__(0x0D2B, ## __VA_ARGS__), /* CRW GT3 reserved */ \ + MACRO__(0x0D2E, ## __VA_ARGS__) /* CRW GT3 reserved */ + +#define INTEL_HSW_IDS(MACRO__, ...) \ + INTEL_HSW_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_HSW_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_HSW_GT3_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_VLV_IDS(MACRO__, ...) \ + MACRO__(0x0f30, ## __VA_ARGS__), \ + MACRO__(0x0f31, ## __VA_ARGS__), \ + MACRO__(0x0f32, ## __VA_ARGS__), \ + MACRO__(0x0f33, ## __VA_ARGS__) + +#define INTEL_BDW_ULT_GT1_IDS(MACRO__, ...) \ + MACRO__(0x1606, ## __VA_ARGS__), /* GT1 ULT */ \ + MACRO__(0x160B, ## __VA_ARGS__) /* GT1 Iris */ + +#define INTEL_BDW_ULX_GT1_IDS(MACRO__, ...) \ + MACRO__(0x160E, ## __VA_ARGS__) /* GT1 ULX */ + +#define INTEL_BDW_GT1_IDS(MACRO__, ...) \ + INTEL_BDW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x1602, ## __VA_ARGS__), /* GT1 ULT */ \ + MACRO__(0x160A, ## __VA_ARGS__), /* GT1 Server */ \ + MACRO__(0x160D, ## __VA_ARGS__) /* GT1 Workstation */ + +#define INTEL_BDW_ULT_GT2_IDS(MACRO__, ...) \ + MACRO__(0x1616, ## __VA_ARGS__), /* GT2 ULT */ \ + MACRO__(0x161B, ## __VA_ARGS__) /* GT2 ULT */ + +#define INTEL_BDW_ULX_GT2_IDS(MACRO__, ...) \ + MACRO__(0x161E, ## __VA_ARGS__) /* GT2 ULX */ + +#define INTEL_BDW_GT2_IDS(MACRO__, ...) \ + INTEL_BDW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x1612, ## __VA_ARGS__), /* GT2 Halo */ \ + MACRO__(0x161A, ## __VA_ARGS__), /* GT2 Server */ \ + MACRO__(0x161D, ## __VA_ARGS__) /* GT2 Workstation */ + +#define INTEL_BDW_ULT_GT3_IDS(MACRO__, ...) \ + MACRO__(0x1626, ## __VA_ARGS__), /* ULT */ \ + MACRO__(0x162B, ## __VA_ARGS__) /* Iris */ \ + +#define INTEL_BDW_ULX_GT3_IDS(MACRO__, ...) \ + MACRO__(0x162E, ## __VA_ARGS__) /* ULX */ + +#define INTEL_BDW_GT3_IDS(MACRO__, ...) \ + INTEL_BDW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_ULX_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x1622, ## __VA_ARGS__), /* ULT */ \ + MACRO__(0x162A, ## __VA_ARGS__), /* Server */ \ + MACRO__(0x162D, ## __VA_ARGS__) /* Workstation */ + +#define INTEL_BDW_ULT_RSVD_IDS(MACRO__, ...) \ + MACRO__(0x1636, ## __VA_ARGS__), /* ULT */ \ + MACRO__(0x163B, ## __VA_ARGS__) /* Iris */ + +#define INTEL_BDW_ULX_RSVD_IDS(MACRO__, ...) \ + MACRO__(0x163E, ## __VA_ARGS__) /* ULX */ + +#define INTEL_BDW_RSVD_IDS(MACRO__, ...) \ + INTEL_BDW_ULT_RSVD_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_ULX_RSVD_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x1632, ## __VA_ARGS__), /* ULT */ \ + MACRO__(0x163A, ## __VA_ARGS__), /* Server */ \ + MACRO__(0x163D, ## __VA_ARGS__) /* Workstation */ + +#define INTEL_BDW_IDS(MACRO__, ...) \ + INTEL_BDW_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_BDW_RSVD_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_CHV_IDS(MACRO__, ...) \ + MACRO__(0x22b0, ## __VA_ARGS__), \ + MACRO__(0x22b1, ## __VA_ARGS__), \ + MACRO__(0x22b2, ## __VA_ARGS__), \ + MACRO__(0x22b3, ## __VA_ARGS__) + +#define INTEL_SKL_ULT_GT1_IDS(MACRO__, ...) \ + MACRO__(0x1906, ## __VA_ARGS__), /* ULT GT1 */ \ + MACRO__(0x1913, ## __VA_ARGS__) /* ULT GT1.5 */ + +#define INTEL_SKL_ULX_GT1_IDS(MACRO__, ...) \ + MACRO__(0x190E, ## __VA_ARGS__), /* ULX GT1 */ \ + MACRO__(0x1915, ## __VA_ARGS__) /* ULX GT1.5 */ + +#define INTEL_SKL_GT1_IDS(MACRO__, ...) \ + INTEL_SKL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SKL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x1902, ## __VA_ARGS__), /* DT GT1 */ \ + MACRO__(0x190A, ## __VA_ARGS__), /* SRV GT1 */ \ + MACRO__(0x190B, ## __VA_ARGS__), /* Halo GT1 */ \ + MACRO__(0x1917, ## __VA_ARGS__) /* DT GT1.5 */ + +#define INTEL_SKL_ULT_GT2_IDS(MACRO__, ...) \ + MACRO__(0x1916, ## __VA_ARGS__), /* ULT GT2 */ \ + MACRO__(0x1921, ## __VA_ARGS__) /* ULT GT2F */ + +#define INTEL_SKL_ULX_GT2_IDS(MACRO__, ...) \ + MACRO__(0x191E, ## __VA_ARGS__) /* ULX GT2 */ + +#define INTEL_SKL_GT2_IDS(MACRO__, ...) \ + INTEL_SKL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SKL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x1912, ## __VA_ARGS__), /* DT GT2 */ \ + MACRO__(0x191A, ## __VA_ARGS__), /* SRV GT2 */ \ + MACRO__(0x191B, ## __VA_ARGS__), /* Halo GT2 */ \ + MACRO__(0x191D, ## __VA_ARGS__) /* WKS GT2 */ + +#define INTEL_SKL_ULT_GT3_IDS(MACRO__, ...) \ + MACRO__(0x1923, ## __VA_ARGS__), /* ULT GT3 */ \ + MACRO__(0x1926, ## __VA_ARGS__), /* ULT GT3e */ \ + MACRO__(0x1927, ## __VA_ARGS__) /* ULT GT3e */ + +#define INTEL_SKL_GT3_IDS(MACRO__, ...) \ + INTEL_SKL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x192A, ## __VA_ARGS__), /* SRV GT3 */ \ + MACRO__(0x192B, ## __VA_ARGS__), /* Halo GT3e */ \ + MACRO__(0x192D, ## __VA_ARGS__) /* SRV GT3e */ + +#define INTEL_SKL_GT4_IDS(MACRO__, ...) \ + MACRO__(0x1932, ## __VA_ARGS__), /* DT GT4 */ \ + MACRO__(0x193A, ## __VA_ARGS__), /* SRV GT4e */ \ + MACRO__(0x193B, ## __VA_ARGS__), /* Halo GT4e */ \ + MACRO__(0x193D, ## __VA_ARGS__) /* WKS GT4e */ + +#define INTEL_SKL_IDS(MACRO__, ...) \ + INTEL_SKL_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SKL_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SKL_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_SKL_GT4_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_BXT_IDS(MACRO__, ...) \ + MACRO__(0x0A84, ## __VA_ARGS__), \ + MACRO__(0x1A84, ## __VA_ARGS__), \ + MACRO__(0x1A85, ## __VA_ARGS__), \ + MACRO__(0x5A84, ## __VA_ARGS__), /* APL HD Graphics 505 */ \ + MACRO__(0x5A85, ## __VA_ARGS__) /* APL HD Graphics 500 */ + +#define INTEL_GLK_IDS(MACRO__, ...) \ + MACRO__(0x3184, ## __VA_ARGS__), \ + MACRO__(0x3185, ## __VA_ARGS__) + +#define INTEL_KBL_ULT_GT1_IDS(MACRO__, ...) \ + MACRO__(0x5906, ## __VA_ARGS__), /* ULT GT1 */ \ + MACRO__(0x5913, ## __VA_ARGS__) /* ULT GT1.5 */ + +#define INTEL_KBL_ULX_GT1_IDS(MACRO__, ...) \ + MACRO__(0x590E, ## __VA_ARGS__), /* ULX GT1 */ \ + MACRO__(0x5915, ## __VA_ARGS__) /* ULX GT1.5 */ + +#define INTEL_KBL_GT1_IDS(MACRO__, ...) \ + INTEL_KBL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_KBL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x5902, ## __VA_ARGS__), /* DT GT1 */ \ + MACRO__(0x5908, ## __VA_ARGS__), /* Halo GT1 */ \ + MACRO__(0x590A, ## __VA_ARGS__), /* SRV GT1 */ \ + MACRO__(0x590B, ## __VA_ARGS__) /* Halo GT1 */ + +#define INTEL_KBL_ULT_GT2_IDS(MACRO__, ...) \ + MACRO__(0x5916, ## __VA_ARGS__), /* ULT GT2 */ \ + MACRO__(0x5921, ## __VA_ARGS__) /* ULT GT2F */ + +#define INTEL_KBL_ULX_GT2_IDS(MACRO__, ...) \ + MACRO__(0x591E, ## __VA_ARGS__) /* ULX GT2 */ + +#define INTEL_KBL_GT2_IDS(MACRO__, ...) \ + INTEL_KBL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_KBL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x5912, ## __VA_ARGS__), /* DT GT2 */ \ + MACRO__(0x5917, ## __VA_ARGS__), /* Mobile GT2 */ \ + MACRO__(0x591A, ## __VA_ARGS__), /* SRV GT2 */ \ + MACRO__(0x591B, ## __VA_ARGS__), /* Halo GT2 */ \ + MACRO__(0x591D, ## __VA_ARGS__) /* WKS GT2 */ + +#define INTEL_KBL_ULT_GT3_IDS(MACRO__, ...) \ + MACRO__(0x5926, ## __VA_ARGS__) /* ULT GT3 */ + +#define INTEL_KBL_GT3_IDS(MACRO__, ...) \ + INTEL_KBL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x5923, ## __VA_ARGS__), /* ULT GT3 */ \ + MACRO__(0x5927, ## __VA_ARGS__) /* ULT GT3 */ + +#define INTEL_KBL_GT4_IDS(MACRO__, ...) \ + MACRO__(0x593B, ## __VA_ARGS__) /* Halo GT4 */ + +/* AML/KBL Y GT2 */ +#define INTEL_AML_KBL_GT2_IDS(MACRO__, ...) \ + MACRO__(0x591C, ## __VA_ARGS__), /* ULX GT2 */ \ + MACRO__(0x87C0, ## __VA_ARGS__) /* ULX GT2 */ + +/* AML/CFL Y GT2 */ +#define INTEL_AML_CFL_GT2_IDS(MACRO__, ...) \ + MACRO__(0x87CA, ## __VA_ARGS__) + +/* CML GT1 */ +#define INTEL_CML_GT1_IDS(MACRO__, ...) \ + MACRO__(0x9BA2, ## __VA_ARGS__), \ + MACRO__(0x9BA4, ## __VA_ARGS__), \ + MACRO__(0x9BA5, ## __VA_ARGS__), \ + MACRO__(0x9BA8, ## __VA_ARGS__) + +#define INTEL_CML_U_GT1_IDS(MACRO__, ...) \ + MACRO__(0x9B21, ## __VA_ARGS__), \ + MACRO__(0x9BAA, ## __VA_ARGS__), \ + MACRO__(0x9BAC, ## __VA_ARGS__) + +/* CML GT2 */ +#define INTEL_CML_GT2_IDS(MACRO__, ...) \ + MACRO__(0x9BC2, ## __VA_ARGS__), \ + MACRO__(0x9BC4, ## __VA_ARGS__), \ + MACRO__(0x9BC5, ## __VA_ARGS__), \ + MACRO__(0x9BC6, ## __VA_ARGS__), \ + MACRO__(0x9BC8, ## __VA_ARGS__), \ + MACRO__(0x9BE6, ## __VA_ARGS__), \ + MACRO__(0x9BF6, ## __VA_ARGS__) + +#define INTEL_CML_U_GT2_IDS(MACRO__, ...) \ + MACRO__(0x9B41, ## __VA_ARGS__), \ + MACRO__(0x9BCA, ## __VA_ARGS__), \ + MACRO__(0x9BCC, ## __VA_ARGS__) + +#define INTEL_CML_IDS(MACRO__, ...) \ + INTEL_CML_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CML_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CML_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CML_U_GT2_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_KBL_IDS(MACRO__, ...) \ + INTEL_KBL_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_KBL_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_KBL_GT4_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_AML_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__) + +/* CFL S */ +#define INTEL_CFL_S_GT1_IDS(MACRO__, ...) \ + MACRO__(0x3E90, ## __VA_ARGS__), /* SRV GT1 */ \ + MACRO__(0x3E93, ## __VA_ARGS__), /* SRV GT1 */ \ + MACRO__(0x3E99, ## __VA_ARGS__) /* SRV GT1 */ + +#define INTEL_CFL_S_GT2_IDS(MACRO__, ...) \ + MACRO__(0x3E91, ## __VA_ARGS__), /* SRV GT2 */ \ + MACRO__(0x3E92, ## __VA_ARGS__), /* SRV GT2 */ \ + MACRO__(0x3E96, ## __VA_ARGS__), /* SRV GT2 */ \ + MACRO__(0x3E98, ## __VA_ARGS__), /* SRV GT2 */ \ + MACRO__(0x3E9A, ## __VA_ARGS__) /* SRV GT2 */ + +/* CFL H */ +#define INTEL_CFL_H_GT1_IDS(MACRO__, ...) \ + MACRO__(0x3E9C, ## __VA_ARGS__) + +#define INTEL_CFL_H_GT2_IDS(MACRO__, ...) \ + MACRO__(0x3E94, ## __VA_ARGS__), /* Halo GT2 */ \ + MACRO__(0x3E9B, ## __VA_ARGS__) /* Halo GT2 */ + +/* CFL U GT2 */ +#define INTEL_CFL_U_GT2_IDS(MACRO__, ...) \ + MACRO__(0x3EA9, ## __VA_ARGS__) + +/* CFL U GT3 */ +#define INTEL_CFL_U_GT3_IDS(MACRO__, ...) \ + MACRO__(0x3EA5, ## __VA_ARGS__), /* ULT GT3 */ \ + MACRO__(0x3EA6, ## __VA_ARGS__), /* ULT GT3 */ \ + MACRO__(0x3EA7, ## __VA_ARGS__), /* ULT GT3 */ \ + MACRO__(0x3EA8, ## __VA_ARGS__) /* ULT GT3 */ + +#define INTEL_CFL_IDS(MACRO__, ...) \ + INTEL_CFL_S_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CFL_S_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CFL_H_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CFL_H_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CFL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_CFL_U_GT3_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_AML_CFL_GT2_IDS(MACRO__, ## __VA_ARGS__) + +/* WHL/CFL U GT1 */ +#define INTEL_WHL_U_GT1_IDS(MACRO__, ...) \ + MACRO__(0x3EA1, ## __VA_ARGS__), \ + MACRO__(0x3EA4, ## __VA_ARGS__) + +/* WHL/CFL U GT2 */ +#define INTEL_WHL_U_GT2_IDS(MACRO__, ...) \ + MACRO__(0x3EA0, ## __VA_ARGS__), \ + MACRO__(0x3EA3, ## __VA_ARGS__) + +/* WHL/CFL U GT3 */ +#define INTEL_WHL_U_GT3_IDS(MACRO__, ...) \ + MACRO__(0x3EA2, ## __VA_ARGS__) + +#define INTEL_WHL_IDS(MACRO__, ...) \ + INTEL_WHL_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_WHL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_WHL_U_GT3_IDS(MACRO__, ## __VA_ARGS__) + +/* CNL */ +#define INTEL_CNL_PORT_F_IDS(MACRO__, ...) \ + MACRO__(0x5A44, ## __VA_ARGS__), \ + MACRO__(0x5A4C, ## __VA_ARGS__), \ + MACRO__(0x5A54, ## __VA_ARGS__), \ + MACRO__(0x5A5C, ## __VA_ARGS__) + +#define INTEL_CNL_IDS(MACRO__, ...) \ + INTEL_CNL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x5A40, ## __VA_ARGS__), \ + MACRO__(0x5A41, ## __VA_ARGS__), \ + MACRO__(0x5A42, ## __VA_ARGS__), \ + MACRO__(0x5A49, ## __VA_ARGS__), \ + MACRO__(0x5A4A, ## __VA_ARGS__), \ + MACRO__(0x5A50, ## __VA_ARGS__), \ + MACRO__(0x5A51, ## __VA_ARGS__), \ + MACRO__(0x5A52, ## __VA_ARGS__), \ + MACRO__(0x5A59, ## __VA_ARGS__), \ + MACRO__(0x5A5A, ## __VA_ARGS__) + +/* ICL */ +#define INTEL_ICL_PORT_F_IDS(MACRO__, ...) \ + MACRO__(0x8A50, ## __VA_ARGS__), \ + MACRO__(0x8A52, ## __VA_ARGS__), \ + MACRO__(0x8A53, ## __VA_ARGS__), \ + MACRO__(0x8A54, ## __VA_ARGS__), \ + MACRO__(0x8A56, ## __VA_ARGS__), \ + MACRO__(0x8A57, ## __VA_ARGS__), \ + MACRO__(0x8A58, ## __VA_ARGS__), \ + MACRO__(0x8A59, ## __VA_ARGS__), \ + MACRO__(0x8A5A, ## __VA_ARGS__), \ + MACRO__(0x8A5B, ## __VA_ARGS__), \ + MACRO__(0x8A5C, ## __VA_ARGS__), \ + MACRO__(0x8A70, ## __VA_ARGS__), \ + MACRO__(0x8A71, ## __VA_ARGS__) + +#define INTEL_ICL_IDS(MACRO__, ...) \ + INTEL_ICL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x8A51, ## __VA_ARGS__), \ + MACRO__(0x8A5D, ## __VA_ARGS__) + +/* EHL */ +#define INTEL_EHL_IDS(MACRO__, ...) \ + MACRO__(0x4541, ## __VA_ARGS__), \ + MACRO__(0x4551, ## __VA_ARGS__), \ + MACRO__(0x4555, ## __VA_ARGS__), \ + MACRO__(0x4557, ## __VA_ARGS__), \ + MACRO__(0x4570, ## __VA_ARGS__), \ + MACRO__(0x4571, ## __VA_ARGS__) + +/* JSL */ +#define INTEL_JSL_IDS(MACRO__, ...) \ + MACRO__(0x4E51, ## __VA_ARGS__), \ + MACRO__(0x4E55, ## __VA_ARGS__), \ + MACRO__(0x4E57, ## __VA_ARGS__), \ + MACRO__(0x4E61, ## __VA_ARGS__), \ + MACRO__(0x4E71, ## __VA_ARGS__) + +/* TGL */ +#define INTEL_TGL_GT1_IDS(MACRO__, ...) \ + MACRO__(0x9A60, ## __VA_ARGS__), \ + MACRO__(0x9A68, ## __VA_ARGS__), \ + MACRO__(0x9A70, ## __VA_ARGS__) + +#define INTEL_TGL_GT2_IDS(MACRO__, ...) \ + MACRO__(0x9A40, ## __VA_ARGS__), \ + MACRO__(0x9A49, ## __VA_ARGS__), \ + MACRO__(0x9A59, ## __VA_ARGS__), \ + MACRO__(0x9A78, ## __VA_ARGS__), \ + MACRO__(0x9AC0, ## __VA_ARGS__), \ + MACRO__(0x9AC9, ## __VA_ARGS__), \ + MACRO__(0x9AD9, ## __VA_ARGS__), \ + MACRO__(0x9AF8, ## __VA_ARGS__) + +#define INTEL_TGL_IDS(MACRO__, ...) \ + INTEL_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__) + +/* RKL */ +#define INTEL_RKL_IDS(MACRO__, ...) \ + MACRO__(0x4C80, ## __VA_ARGS__), \ + MACRO__(0x4C8A, ## __VA_ARGS__), \ + MACRO__(0x4C8B, ## __VA_ARGS__), \ + MACRO__(0x4C8C, ## __VA_ARGS__), \ + MACRO__(0x4C90, ## __VA_ARGS__), \ + MACRO__(0x4C9A, ## __VA_ARGS__) + +/* DG1 */ +#define INTEL_DG1_IDS(MACRO__, ...) \ + MACRO__(0x4905, ## __VA_ARGS__), \ + MACRO__(0x4906, ## __VA_ARGS__), \ + MACRO__(0x4907, ## __VA_ARGS__), \ + MACRO__(0x4908, ## __VA_ARGS__), \ + MACRO__(0x4909, ## __VA_ARGS__) + +/* ADL-S */ +#define INTEL_ADLS_IDS(MACRO__, ...) \ + MACRO__(0x4680, ## __VA_ARGS__), \ + MACRO__(0x4682, ## __VA_ARGS__), \ + MACRO__(0x4688, ## __VA_ARGS__), \ + MACRO__(0x468A, ## __VA_ARGS__), \ + MACRO__(0x468B, ## __VA_ARGS__), \ + MACRO__(0x4690, ## __VA_ARGS__), \ + MACRO__(0x4692, ## __VA_ARGS__), \ + MACRO__(0x4693, ## __VA_ARGS__) + +/* ADL-P */ +#define INTEL_ADLP_IDS(MACRO__, ...) \ + MACRO__(0x46A0, ## __VA_ARGS__), \ + MACRO__(0x46A1, ## __VA_ARGS__), \ + MACRO__(0x46A2, ## __VA_ARGS__), \ + MACRO__(0x46A3, ## __VA_ARGS__), \ + MACRO__(0x46A6, ## __VA_ARGS__), \ + MACRO__(0x46A8, ## __VA_ARGS__), \ + MACRO__(0x46AA, ## __VA_ARGS__), \ + MACRO__(0x462A, ## __VA_ARGS__), \ + MACRO__(0x4626, ## __VA_ARGS__), \ + MACRO__(0x4628, ## __VA_ARGS__), \ + MACRO__(0x46B0, ## __VA_ARGS__), \ + MACRO__(0x46B1, ## __VA_ARGS__), \ + MACRO__(0x46B2, ## __VA_ARGS__), \ + MACRO__(0x46B3, ## __VA_ARGS__), \ + MACRO__(0x46C0, ## __VA_ARGS__), \ + MACRO__(0x46C1, ## __VA_ARGS__), \ + MACRO__(0x46C2, ## __VA_ARGS__), \ + MACRO__(0x46C3, ## __VA_ARGS__) + +/* ADL-N */ +#define INTEL_ADLN_IDS(MACRO__, ...) \ + MACRO__(0x46D0, ## __VA_ARGS__), \ + MACRO__(0x46D1, ## __VA_ARGS__), \ + MACRO__(0x46D2, ## __VA_ARGS__), \ + MACRO__(0x46D3, ## __VA_ARGS__), \ + MACRO__(0x46D4, ## __VA_ARGS__) + +/* RPL-S */ +#define INTEL_RPLS_IDS(MACRO__, ...) \ + MACRO__(0xA780, ## __VA_ARGS__), \ + MACRO__(0xA781, ## __VA_ARGS__), \ + MACRO__(0xA782, ## __VA_ARGS__), \ + MACRO__(0xA783, ## __VA_ARGS__), \ + MACRO__(0xA788, ## __VA_ARGS__), \ + MACRO__(0xA789, ## __VA_ARGS__), \ + MACRO__(0xA78A, ## __VA_ARGS__), \ + MACRO__(0xA78B, ## __VA_ARGS__) + +/* RPL-U */ +#define INTEL_RPLU_IDS(MACRO__, ...) \ + MACRO__(0xA721, ## __VA_ARGS__), \ + MACRO__(0xA7A1, ## __VA_ARGS__), \ + MACRO__(0xA7A9, ## __VA_ARGS__), \ + MACRO__(0xA7AC, ## __VA_ARGS__), \ + MACRO__(0xA7AD, ## __VA_ARGS__) + +/* RPL-P */ +#define INTEL_RPLP_IDS(MACRO__, ...) \ + MACRO__(0xA720, ## __VA_ARGS__), \ + MACRO__(0xA7A0, ## __VA_ARGS__), \ + MACRO__(0xA7A8, ## __VA_ARGS__), \ + MACRO__(0xA7AA, ## __VA_ARGS__), \ + MACRO__(0xA7AB, ## __VA_ARGS__) + +/* DG2 */ +#define INTEL_DG2_G10_IDS(MACRO__, ...) \ + MACRO__(0x5690, ## __VA_ARGS__), \ + MACRO__(0x5691, ## __VA_ARGS__), \ + MACRO__(0x5692, ## __VA_ARGS__), \ + MACRO__(0x56A0, ## __VA_ARGS__), \ + MACRO__(0x56A1, ## __VA_ARGS__), \ + MACRO__(0x56A2, ## __VA_ARGS__), \ + MACRO__(0x56BE, ## __VA_ARGS__), \ + MACRO__(0x56BF, ## __VA_ARGS__) + +#define INTEL_DG2_G11_IDS(MACRO__, ...) \ + MACRO__(0x5693, ## __VA_ARGS__), \ + MACRO__(0x5694, ## __VA_ARGS__), \ + MACRO__(0x5695, ## __VA_ARGS__), \ + MACRO__(0x56A5, ## __VA_ARGS__), \ + MACRO__(0x56A6, ## __VA_ARGS__), \ + MACRO__(0x56B0, ## __VA_ARGS__), \ + MACRO__(0x56B1, ## __VA_ARGS__), \ + MACRO__(0x56BA, ## __VA_ARGS__), \ + MACRO__(0x56BB, ## __VA_ARGS__), \ + MACRO__(0x56BC, ## __VA_ARGS__), \ + MACRO__(0x56BD, ## __VA_ARGS__) + +#define INTEL_DG2_G12_IDS(MACRO__, ...) \ + MACRO__(0x5696, ## __VA_ARGS__), \ + MACRO__(0x5697, ## __VA_ARGS__), \ + MACRO__(0x56A3, ## __VA_ARGS__), \ + MACRO__(0x56A4, ## __VA_ARGS__), \ + MACRO__(0x56B2, ## __VA_ARGS__), \ + MACRO__(0x56B3, ## __VA_ARGS__) + +#define INTEL_DG2_IDS(MACRO__, ...) \ + INTEL_DG2_G10_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G11_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_ATS_M150_IDS(MACRO__, ...) \ + MACRO__(0x56C0, ## __VA_ARGS__), \ + MACRO__(0x56C2, ## __VA_ARGS__) + +#define INTEL_ATS_M75_IDS(MACRO__, ...) \ + MACRO__(0x56C1, ## __VA_ARGS__) + +#define INTEL_ATS_M_IDS(MACRO__, ...) \ + INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) + +/* ARL */ +#define INTEL_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__), \ + MACRO__(0xB640, ## __VA_ARGS__) + +/* MTL */ +#define INTEL_MTL_IDS(MACRO__, ...) \ + MACRO__(0x7D40, ## __VA_ARGS__), \ + MACRO__(0x7D45, ## __VA_ARGS__), \ + MACRO__(0x7D55, ## __VA_ARGS__), \ + MACRO__(0x7D60, ## __VA_ARGS__), \ + MACRO__(0x7DD5, ## __VA_ARGS__) + +/* PVC */ +#define INTEL_PVC_IDS(MACRO__, ...) \ + MACRO__(0x0B69, ## __VA_ARGS__), \ + MACRO__(0x0B6E, ## __VA_ARGS__), \ + MACRO__(0x0BD4, ## __VA_ARGS__), \ + MACRO__(0x0BD5, ## __VA_ARGS__), \ + MACRO__(0x0BD6, ## __VA_ARGS__), \ + MACRO__(0x0BD7, ## __VA_ARGS__), \ + MACRO__(0x0BD8, ## __VA_ARGS__), \ + MACRO__(0x0BD9, ## __VA_ARGS__), \ + MACRO__(0x0BDA, ## __VA_ARGS__), \ + MACRO__(0x0BDB, ## __VA_ARGS__), \ + MACRO__(0x0BE0, ## __VA_ARGS__), \ + MACRO__(0x0BE1, ## __VA_ARGS__), \ + MACRO__(0x0BE5, ## __VA_ARGS__) + +/* LNL */ +#define INTEL_LNL_IDS(MACRO__, ...) \ + MACRO__(0x6420, ## __VA_ARGS__), \ + MACRO__(0x64A0, ## __VA_ARGS__), \ + MACRO__(0x64B0, ## __VA_ARGS__) + +/* BMG */ +#define INTEL_BMG_IDS(MACRO__, ...) \ + MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE20B, ## __VA_ARGS__), \ + MACRO__(0xE20C, ## __VA_ARGS__), \ + MACRO__(0xE20D, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__) + +/* PTL */ +#define INTEL_PTL_IDS(MACRO__, ...) \ + MACRO__(0xB080, ## __VA_ARGS__), \ + MACRO__(0xB081, ## __VA_ARGS__), \ + MACRO__(0xB082, ## __VA_ARGS__), \ + MACRO__(0xB090, ## __VA_ARGS__), \ + MACRO__(0xB091, ## __VA_ARGS__), \ + MACRO__(0xB092, ## __VA_ARGS__), \ + MACRO__(0xB0A0, ## __VA_ARGS__), \ + MACRO__(0xB0A1, ## __VA_ARGS__), \ + MACRO__(0xB0A2, ## __VA_ARGS__) + +#endif /* __PCIIDS_H__ */ -- cgit v1.2.3 From 493454445c9531051bd27a0305a61953780bd453 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Oct 2024 12:41:51 +0300 Subject: drm/xe: switch to common PCI ID macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch to the shared PCI ID macros in drm/intel/pciids.h. Remove xe_pciids.h. Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/84e08172184bdc6409cf6dd13f6c52971c647dbb.1729590029.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/intel/xe_pciids.h | 234 ------------------------------------------ 1 file changed, 234 deletions(-) delete mode 100644 include/drm/intel/xe_pciids.h (limited to 'include') diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h deleted file mode 100644 index 6d8d013f74e0..000000000000 --- a/include/drm/intel/xe_pciids.h +++ /dev/null @@ -1,234 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_PCIIDS_H_ -#define _XE_PCIIDS_H_ - -/* - * Lists below can be turned into initializers for a struct pci_device_id - * by defining INTEL_VGA_DEVICE: - * - * #define INTEL_VGA_DEVICE(id, info) { \ - * 0x8086, id, \ - * ~0, ~0, \ - * 0x030000, 0xff0000, \ - * (unsigned long) info } - * - * And then calling like: - * - * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__) - * - * To turn them into something else, just provide a different macro passed as - * first argument. - */ - -/* TGL */ -#define XE_TGL_GT1_IDS(MACRO__, ...) \ - MACRO__(0x9A60, ## __VA_ARGS__), \ - MACRO__(0x9A68, ## __VA_ARGS__), \ - MACRO__(0x9A70, ## __VA_ARGS__) - -#define XE_TGL_GT2_IDS(MACRO__, ...) \ - MACRO__(0x9A40, ## __VA_ARGS__), \ - MACRO__(0x9A49, ## __VA_ARGS__), \ - MACRO__(0x9A59, ## __VA_ARGS__), \ - MACRO__(0x9A78, ## __VA_ARGS__), \ - MACRO__(0x9AC0, ## __VA_ARGS__), \ - MACRO__(0x9AC9, ## __VA_ARGS__), \ - MACRO__(0x9AD9, ## __VA_ARGS__), \ - MACRO__(0x9AF8, ## __VA_ARGS__) - -#define XE_TGL_IDS(MACRO__, ...) \ - XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\ - XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__) - -/* RKL */ -#define XE_RKL_IDS(MACRO__, ...) \ - MACRO__(0x4C80, ## __VA_ARGS__), \ - MACRO__(0x4C8A, ## __VA_ARGS__), \ - MACRO__(0x4C8B, ## __VA_ARGS__), \ - MACRO__(0x4C8C, ## __VA_ARGS__), \ - MACRO__(0x4C90, ## __VA_ARGS__), \ - MACRO__(0x4C9A, ## __VA_ARGS__) - -/* DG1 */ -#define XE_DG1_IDS(MACRO__, ...) \ - MACRO__(0x4905, ## __VA_ARGS__), \ - MACRO__(0x4906, ## __VA_ARGS__), \ - MACRO__(0x4907, ## __VA_ARGS__), \ - MACRO__(0x4908, ## __VA_ARGS__), \ - MACRO__(0x4909, ## __VA_ARGS__) - -/* ADL-S */ -#define XE_ADLS_IDS(MACRO__, ...) \ - MACRO__(0x4680, ## __VA_ARGS__), \ - MACRO__(0x4682, ## __VA_ARGS__), \ - MACRO__(0x4688, ## __VA_ARGS__), \ - MACRO__(0x468A, ## __VA_ARGS__), \ - MACRO__(0x468B, ## __VA_ARGS__), \ - MACRO__(0x4690, ## __VA_ARGS__), \ - MACRO__(0x4692, ## __VA_ARGS__), \ - MACRO__(0x4693, ## __VA_ARGS__) - -/* ADL-P */ -#define XE_ADLP_IDS(MACRO__, ...) \ - MACRO__(0x46A0, ## __VA_ARGS__), \ - MACRO__(0x46A1, ## __VA_ARGS__), \ - MACRO__(0x46A2, ## __VA_ARGS__), \ - MACRO__(0x46A3, ## __VA_ARGS__), \ - MACRO__(0x46A6, ## __VA_ARGS__), \ - MACRO__(0x46A8, ## __VA_ARGS__), \ - MACRO__(0x46AA, ## __VA_ARGS__), \ - MACRO__(0x462A, ## __VA_ARGS__), \ - MACRO__(0x4626, ## __VA_ARGS__), \ - MACRO__(0x4628, ## __VA_ARGS__), \ - MACRO__(0x46B0, ## __VA_ARGS__), \ - MACRO__(0x46B1, ## __VA_ARGS__), \ - MACRO__(0x46B2, ## __VA_ARGS__), \ - MACRO__(0x46B3, ## __VA_ARGS__), \ - MACRO__(0x46C0, ## __VA_ARGS__), \ - MACRO__(0x46C1, ## __VA_ARGS__), \ - MACRO__(0x46C2, ## __VA_ARGS__), \ - MACRO__(0x46C3, ## __VA_ARGS__) - -/* ADL-N */ -#define XE_ADLN_IDS(MACRO__, ...) \ - MACRO__(0x46D0, ## __VA_ARGS__), \ - MACRO__(0x46D1, ## __VA_ARGS__), \ - MACRO__(0x46D2, ## __VA_ARGS__), \ - MACRO__(0x46D3, ## __VA_ARGS__), \ - MACRO__(0x46D4, ## __VA_ARGS__) - -/* RPL-S */ -#define XE_RPLS_IDS(MACRO__, ...) \ - MACRO__(0xA780, ## __VA_ARGS__), \ - MACRO__(0xA781, ## __VA_ARGS__), \ - MACRO__(0xA782, ## __VA_ARGS__), \ - MACRO__(0xA783, ## __VA_ARGS__), \ - MACRO__(0xA788, ## __VA_ARGS__), \ - MACRO__(0xA789, ## __VA_ARGS__), \ - MACRO__(0xA78A, ## __VA_ARGS__), \ - MACRO__(0xA78B, ## __VA_ARGS__) - -/* RPL-U */ -#define XE_RPLU_IDS(MACRO__, ...) \ - MACRO__(0xA721, ## __VA_ARGS__), \ - MACRO__(0xA7A1, ## __VA_ARGS__), \ - MACRO__(0xA7A9, ## __VA_ARGS__), \ - MACRO__(0xA7AC, ## __VA_ARGS__), \ - MACRO__(0xA7AD, ## __VA_ARGS__) - -/* RPL-P */ -#define XE_RPLP_IDS(MACRO__, ...) \ - MACRO__(0xA720, ## __VA_ARGS__), \ - MACRO__(0xA7A0, ## __VA_ARGS__), \ - MACRO__(0xA7A8, ## __VA_ARGS__), \ - MACRO__(0xA7AA, ## __VA_ARGS__), \ - MACRO__(0xA7AB, ## __VA_ARGS__) - -/* DG2 */ -#define XE_DG2_G10_IDS(MACRO__, ...) \ - MACRO__(0x5690, ## __VA_ARGS__), \ - MACRO__(0x5691, ## __VA_ARGS__), \ - MACRO__(0x5692, ## __VA_ARGS__), \ - MACRO__(0x56A0, ## __VA_ARGS__), \ - MACRO__(0x56A1, ## __VA_ARGS__), \ - MACRO__(0x56A2, ## __VA_ARGS__), \ - MACRO__(0x56BE, ## __VA_ARGS__), \ - MACRO__(0x56BF, ## __VA_ARGS__) - -#define XE_DG2_G11_IDS(MACRO__, ...) \ - MACRO__(0x5693, ## __VA_ARGS__), \ - MACRO__(0x5694, ## __VA_ARGS__), \ - MACRO__(0x5695, ## __VA_ARGS__), \ - MACRO__(0x56A5, ## __VA_ARGS__), \ - MACRO__(0x56A6, ## __VA_ARGS__), \ - MACRO__(0x56B0, ## __VA_ARGS__), \ - MACRO__(0x56B1, ## __VA_ARGS__), \ - MACRO__(0x56BA, ## __VA_ARGS__), \ - MACRO__(0x56BB, ## __VA_ARGS__), \ - MACRO__(0x56BC, ## __VA_ARGS__), \ - MACRO__(0x56BD, ## __VA_ARGS__) - -#define XE_DG2_G12_IDS(MACRO__, ...) \ - MACRO__(0x5696, ## __VA_ARGS__), \ - MACRO__(0x5697, ## __VA_ARGS__), \ - MACRO__(0x56A3, ## __VA_ARGS__), \ - MACRO__(0x56A4, ## __VA_ARGS__), \ - MACRO__(0x56B2, ## __VA_ARGS__), \ - MACRO__(0x56B3, ## __VA_ARGS__) - -#define XE_DG2_IDS(MACRO__, ...) \ - XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\ - XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\ - XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) - -#define XE_ATS_M150_IDS(MACRO__, ...) \ - MACRO__(0x56C0, ## __VA_ARGS__), \ - MACRO__(0x56C2, ## __VA_ARGS__) - -#define XE_ATS_M75_IDS(MACRO__, ...) \ - MACRO__(0x56C1, ## __VA_ARGS__) - -#define XE_ATS_M_IDS(MACRO__, ...) \ - XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ - XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) - -/* ARL */ -#define XE_ARL_IDS(MACRO__, ...) \ - MACRO__(0x7D41, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ - MACRO__(0xB640, ## __VA_ARGS__) - -/* MTL */ -#define XE_MTL_IDS(MACRO__, ...) \ - MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D55, ## __VA_ARGS__), \ - MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7DD5, ## __VA_ARGS__) - -/* PVC */ -#define XE_PVC_IDS(MACRO__, ...) \ - MACRO__(0x0B69, ## __VA_ARGS__), \ - MACRO__(0x0B6E, ## __VA_ARGS__), \ - MACRO__(0x0BD4, ## __VA_ARGS__), \ - MACRO__(0x0BD5, ## __VA_ARGS__), \ - MACRO__(0x0BD6, ## __VA_ARGS__), \ - MACRO__(0x0BD7, ## __VA_ARGS__), \ - MACRO__(0x0BD8, ## __VA_ARGS__), \ - MACRO__(0x0BD9, ## __VA_ARGS__), \ - MACRO__(0x0BDA, ## __VA_ARGS__), \ - MACRO__(0x0BDB, ## __VA_ARGS__), \ - MACRO__(0x0BE0, ## __VA_ARGS__), \ - MACRO__(0x0BE1, ## __VA_ARGS__), \ - MACRO__(0x0BE5, ## __VA_ARGS__) - -#define XE_LNL_IDS(MACRO__, ...) \ - MACRO__(0x6420, ## __VA_ARGS__), \ - MACRO__(0x64A0, ## __VA_ARGS__), \ - MACRO__(0x64B0, ## __VA_ARGS__) - -#define XE_BMG_IDS(MACRO__, ...) \ - MACRO__(0xE202, ## __VA_ARGS__), \ - MACRO__(0xE20B, ## __VA_ARGS__), \ - MACRO__(0xE20C, ## __VA_ARGS__), \ - MACRO__(0xE20D, ## __VA_ARGS__), \ - MACRO__(0xE212, ## __VA_ARGS__) - -#define XE_PTL_IDS(MACRO__, ...) \ - MACRO__(0xB080, ## __VA_ARGS__), \ - MACRO__(0xB081, ## __VA_ARGS__), \ - MACRO__(0xB082, ## __VA_ARGS__), \ - MACRO__(0xB090, ## __VA_ARGS__), \ - MACRO__(0xB091, ## __VA_ARGS__), \ - MACRO__(0xB092, ## __VA_ARGS__), \ - MACRO__(0xB0A0, ## __VA_ARGS__), \ - MACRO__(0xB0A1, ## __VA_ARGS__), \ - MACRO__(0xB0A2, ## __VA_ARGS__) - -#endif -- cgit v1.2.3 From 2f5a65ef30a636d5030917eebd283ac447a212af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Oct 2024 15:19:37 +0100 Subject: block: add a bdev_limits helper Add a helper to get the queue_limits from the bdev without having to poke into the request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20241029141937.249920-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d0a52ed05e60..7bfc877e159e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1159,6 +1159,11 @@ enum blk_default_limits { */ #define BLK_DEF_MAX_SECTORS_CAP 2560u +static inline struct queue_limits *bdev_limits(struct block_device *bdev) +{ + return &bdev_get_queue(bdev)->limits; +} + static inline unsigned long queue_segment_boundary(const struct request_queue *q) { return q->limits.seg_boundary_mask; @@ -1293,23 +1298,23 @@ unsigned int bdev_discard_alignment(struct block_device *bdev); static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.max_discard_sectors; + return bdev_limits(bdev)->max_discard_sectors; } static inline unsigned int bdev_discard_granularity(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.discard_granularity; + return bdev_limits(bdev)->discard_granularity; } static inline unsigned int bdev_max_secure_erase_sectors(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.max_secure_erase_sectors; + return bdev_limits(bdev)->max_secure_erase_sectors; } static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors; + return bdev_limits(bdev)->max_write_zeroes_sectors; } static inline bool bdev_nonrot(struct block_device *bdev) @@ -1345,7 +1350,7 @@ static inline bool bdev_write_cache(struct block_device *bdev) static inline bool bdev_fua(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA; + return bdev_limits(bdev)->features & BLK_FEAT_FUA; } static inline bool bdev_nowait(struct block_device *bdev) -- cgit v1.2.3 From a377132154ab8404dafcc52e8bc0c73050a954c2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Sep 2024 05:57:31 -0600 Subject: io_uring/msg_ring: add support for sending a sync message Normally MSG_RING requires both a source and a destination ring. But some users don't always have a ring avilable to send a message from, yet they still need to notify a target ring. Add support for using io_uring_register(2) without having a source ring, using a file descriptor of -1 for that. Internally those are called blind registration opcodes. Implement IORING_REGISTER_SEND_MSG_RING as a blind opcode, which simply takes an sqe that the application can put on the stack and use the normal liburing helpers to initialize it. Then the app can call: io_uring_register(-1, IORING_REGISTER_SEND_MSG_RING, &sqe, 1); and get the same behavior in terms of the target, where a CQE is posted with the details given in the sqe. For now this takes a single sqe pointer argument, and hence arg must be set to that, and nr_args must be 1. Could easily be extended to take an array of sqes, but for now let's keep it simple. Link: https://lore.kernel.org/r/20240924115932.116167-3-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1fe79e750470..86cb385fe0b5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -612,6 +612,9 @@ enum io_uring_register_op { /* clone registered buffers from source ring to current ring */ IORING_REGISTER_CLONE_BUFFERS = 30, + /* send MSG_RING without having a ring */ + IORING_REGISTER_SEND_MSG_RING = 31, + /* this goes last */ IORING_REGISTER_LAST, -- cgit v1.2.3 From 085268829b07202cf7bf8ec1a8fb7fd9d8f6a41a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Sep 2024 14:22:36 -0600 Subject: io_uring/poll: get rid of unlocked cancel hash io_uring maintains two hash lists of inflight requests: 1) ctx->cancel_table_locked. This is used when the caller has the ctx->uring_lock held already. This is only an issue side parameter, as removal or task_work will always have it held. 2) ctx->cancel_table. This is used when the issuer does NOT have the ctx->uring_lock held, and relies on the table spinlocks for access. However, it's pretty trivial to simply grab the lock in the one spot where we care about it, for insertion. With that, we can kill the unlocked table (and get rid of the _locked postfix for the other one). Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 4b9ba523978d..d8ca27da1341 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -291,7 +291,7 @@ struct io_ring_ctx { struct xarray io_bl_xa; - struct io_hash_table cancel_table_locked; + struct io_hash_table cancel_table; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; struct io_alloc_cache rw_cache; @@ -342,7 +342,6 @@ struct io_ring_ctx { struct list_head io_buffers_comp; struct list_head cq_overflow_list; - struct io_hash_table cancel_table; struct hlist_head waitid_list; @@ -459,7 +458,6 @@ enum { REQ_F_DOUBLE_POLL_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, - REQ_F_HASH_LOCKED_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, @@ -534,8 +532,6 @@ enum { REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), - /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ - REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), /* file is pollable */ -- cgit v1.2.3 From ba4366f57b117c2eab996642288e5c75646ccfc9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Sep 2024 14:29:06 -0600 Subject: io_uring/poll: get rid of per-hashtable bucket locks Any access to the table is protected by ctx->uring_lock now anyway, the per-bucket locking doesn't buy us anything. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index d8ca27da1341..9c7e1d3f06e5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -67,7 +67,6 @@ struct io_file_table { }; struct io_hash_bucket { - spinlock_t lock; struct hlist_head list; } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 2946f08ae9ed650b94e0ffebcdfdda8de76bd926 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 18 Oct 2024 17:14:00 +0100 Subject: io_uring: clean up cqe trace points We have too many helpers posting CQEs, instead of tracing completion events before filling in a CQE and thus having to pass all the data, set the CQE first, pass it to the tracing helper and let it extract everything it needs. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b83c1ca9ee5aed2df0f3bb743bf5ed699cce4c86.1729267437.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 +++++ include/trace/events/io_uring.h | 24 +++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 9c7e1d3f06e5..391087144666 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -662,4 +662,9 @@ struct io_overflow_cqe { struct io_uring_cqe cqe; }; +static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx) +{ + return ctx->flags & IORING_SETUP_CQE32; +} + #endif diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 412c9c210a32..fb81c533b310 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -315,20 +315,14 @@ TRACE_EVENT(io_uring_fail_link, * io_uring_complete - called when completing an SQE * * @ctx: pointer to a ring context structure - * @req: pointer to a submitted request - * @user_data: user data associated with the request - * @res: result of the request - * @cflags: completion flags - * @extra1: extra 64-bit data for CQE32 - * @extra2: extra 64-bit data for CQE32 - * + * @req: (optional) pointer to a submitted request + * @cqe: pointer to the filled in CQE being posted */ TRACE_EVENT(io_uring_complete, - TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags, - u64 extra1, u64 extra2), +TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe), - TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2), + TP_ARGS(ctx, req, cqe), TP_STRUCT__entry ( __field( void *, ctx ) @@ -343,11 +337,11 @@ TRACE_EVENT(io_uring_complete, TP_fast_assign( __entry->ctx = ctx; __entry->req = req; - __entry->user_data = user_data; - __entry->res = res; - __entry->cflags = cflags; - __entry->extra1 = extra1; - __entry->extra2 = extra2; + __entry->user_data = cqe->user_data; + __entry->res = cqe->res; + __entry->cflags = cqe->flags; + __entry->extra1 = io_ctx_cqe32(ctx) ? cqe->big_cqe[0] : 0; + __entry->extra2 = io_ctx_cqe32(ctx) ? cqe->big_cqe[1] : 0; ), TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x " -- cgit v1.2.3 From e6d43739d0ee49a39505d696ba6a656f47c2bd39 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Oct 2024 15:54:06 -0600 Subject: io_uring: kill 'imu' from struct io_kiocb It's no longer being used, remove it. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 391087144666..6d3ee71bd832 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -613,9 +613,6 @@ struct io_kiocb { struct task_struct *task; union { - /* store used ubuf, so we can prevent reloading */ - struct io_mapped_ubuf *imu; - /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ struct io_buffer *kbuf; -- cgit v1.2.3 From 79cfe9e59c2a12c3b3faeeefe38d23f3d8030972 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 21 Oct 2024 13:34:10 -0600 Subject: io_uring/register: add IORING_REGISTER_RESIZE_RINGS Once a ring has been created, the size of the CQ and SQ rings are fixed. Usually this isn't a problem on the SQ ring side, as it merely controls the available number of requests that can be submitted in a single system call, and there's rarely a need to change that. For the CQ ring, it's a different story. For most efficient use of io_uring, it's important that the CQ ring never overflows. This means that applications must size it for the worst case scenario, which can be wasteful. Add IORING_REGISTER_RESIZE_RINGS, which allows an application to resize the existing rings. It takes a struct io_uring_params argument, the same one which is used to setup the ring initially, and resizes rings according to the sizes given. Certain properties are always inherited from the original ring setup, like SQE128/CQE32 and other setup options. The implementation only allows flag associated with how the CQ ring is sized and clamped. Existing unconsumed SQE and CQE entries are copied as part of the process. If either the SQ or CQ resized destination ring cannot hold the entries already present in the source rings, then the operation is failed with -EOVERFLOW. Any register op holds ->uring_lock, which prevents new submissions, and the internal mapping holds the completion lock as well across moving CQ ring state. To prevent races between mmap and ring resizing, add a mutex that's solely used to serialize ring resize and mmap. mmap_sem can't be used here, as as fork'ed process may be doing mmaps on the ring as well. The ctx->resize_lock is held across mmap operations, and the resize will grab it before swapping out the already mapped new data. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 7 +++++++ include/uapi/linux/io_uring.h | 5 +++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 6d3ee71bd832..841579dcdae9 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -415,6 +415,13 @@ struct io_ring_ctx { /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + /* + * Protection for resize vs mmap races - both the mmap and resize + * side will need to grab this lock, to prevent either side from + * being run concurrently with the other. + */ + struct mutex resize_lock; + /* * If IORING_SETUP_NO_MMAP is used, then the below holds * the gup'ed pages for the two rings, and the sqes. diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 86cb385fe0b5..60b9c98595fa 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -615,6 +615,11 @@ enum io_uring_register_op { /* send MSG_RING without having a ring */ IORING_REGISTER_SEND_MSG_RING = 31, + /* 32 reserved for zc rx */ + + /* resize CQ ring */ + IORING_REGISTER_RESIZE_RINGS = 33, + /* this goes last */ IORING_REGISTER_LAST, -- cgit v1.2.3 From aa00f67adc2c0d6439f81b5a81ff181377c47a7e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 22 Oct 2024 13:47:00 -0600 Subject: io_uring: add support for fixed wait regions Generally applications have 1 or a few waits of waiting, yet they pass in a struct io_uring_getevents_arg every time. This needs to get copied and, in turn, the timeout value needs to get copied. Rather than do this for every invocation, allow the application to register a fixed set of wait regions that can simply be indexed when asking the kernel to wait on events. At ring setup time, the application can register a number of these wait regions and initialize region/index 0 upfront: struct io_uring_reg_wait *reg; reg = io_uring_setup_reg_wait(ring, nr_regions, &ret); /* set timeout and mark as set, sigmask/sigmask_sz as needed */ reg->ts.tv_sec = 0; reg->ts.tv_nsec = 100000; reg->flags = IORING_REG_WAIT_TS; where nr_regions >= 1 && nr_regions <= PAGE_SIZE / sizeof(*reg). The above initializes index 0, but 63 other regions can be initialized, if needed. Now, instead of doing: struct __kernel_timespec timeout = { .tv_nsec = 100000, }; io_uring_submit_and_wait_timeout(ring, &cqe, nr, &t, NULL); to wait for events for each submit_and_wait, or just wait, operation, it can just reference the above region at offset 0 and do: io_uring_submit_and_wait_reg(ring, &cqe, nr, 0); to achieve the same goal of waiting 100usec without needing to copy both struct io_uring_getevents_arg (24b) and struct __kernel_timeout (16b) for each invocation. Struct io_uring_reg_wait looks as follows: struct io_uring_reg_wait { struct __kernel_timespec ts; __u32 min_wait_usec; __u32 flags; __u64 sigmask; __u32 sigmask_sz; __u32 pad[3]; __u64 pad2[2]; }; embedding the timeout itself in the region, rather than passing it as a pointer as well. Note that the signal mask is still passed as a pointer, both for compatability reasons, but also because there doesn't seem to be a lot of high frequency waits scenarios that involve setting and resetting the signal mask for each wait. The application is free to modify any region before a wait call, or it can use keep multiple regions with different settings to avoid needing to modify the same one for wait calls. Up to a page size of regions is mapped by default, allowing PAGE_SIZE / 64 available regions for use. The registered region must fit within a page. On a 4kb page size system, that allows for 64 wait regions if a full page is used, as the size of struct io_uring_reg_wait is 64b. The region registered must be aligned to io_uring_reg_wait in size. It's valid to register less than 64 entries. In network performance testing with zero-copy, this reduced the time spent waiting on the TX side from 3.12% to 0.3% and the RX side from 4.4% to 0.3%. Wait regions are fixed for the lifetime of the ring - once registered, they are persistent until the ring is torn down. The regions support minimum wait timeout as well as the regular waits. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 10 ++++++++++ include/uapi/linux/io_uring.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 841579dcdae9..2f12828b22a4 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -327,6 +327,14 @@ struct io_ring_ctx { atomic_t cq_wait_nr; atomic_t cq_timeouts; struct wait_queue_head cq_wait; + + /* + * If registered with IORING_REGISTER_CQWAIT_REG, a single + * page holds N entries, mapped in cq_wait_arg. cq_wait_index + * is the maximum allowable index. + */ + struct io_uring_reg_wait *cq_wait_arg; + unsigned char cq_wait_index; } ____cacheline_aligned_in_smp; /* timeouts */ @@ -430,6 +438,8 @@ struct io_ring_ctx { unsigned short n_sqe_pages; struct page **ring_pages; struct page **sqe_pages; + + struct page **cq_wait_page; }; struct io_tw_state { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 60b9c98595fa..65b7417c1b05 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -518,6 +518,7 @@ struct io_cqring_offsets { #define IORING_ENTER_EXT_ARG (1U << 3) #define IORING_ENTER_REGISTERED_RING (1U << 4) #define IORING_ENTER_ABS_TIMER (1U << 5) +#define IORING_ENTER_EXT_ARG_REG (1U << 6) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -620,6 +621,9 @@ enum io_uring_register_op { /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, + /* register fixed io_uring_reg_wait arguments */ + IORING_REGISTER_CQWAIT_REG = 34, + /* this goes last */ IORING_REGISTER_LAST, @@ -803,6 +807,43 @@ enum io_uring_register_restriction_op { IORING_RESTRICTION_LAST }; +enum { + IORING_REG_WAIT_TS = (1U << 0), +}; + +/* + * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of + * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is + * called rather than pass in a wait argument structure separately. + */ +struct io_uring_cqwait_reg_arg { + __u32 flags; + __u32 struct_size; + __u32 nr_entries; + __u32 pad; + __u64 user_addr; + __u64 pad2[3]; +}; + +/* + * Argument for io_uring_enter(2) with + * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument + * is an index into a previously registered fixed wait region described by + * the below structure. + */ +struct io_uring_reg_wait { + struct __kernel_timespec ts; + __u32 min_wait_usec; + __u32 flags; + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad[3]; + __u64 pad2[2]; +}; + +/* + * Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG + */ struct io_uring_getevents_arg { __u64 sigmask; __u32 sigmask_sz; -- cgit v1.2.3 From a85f31052bce52111b4e9d5a536003481d0421d0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 27 Oct 2024 08:59:10 -0600 Subject: io_uring/nop: add support for testing registered files and buffers Useful for testing performance/efficiency impact of registered files and buffers, vs (particularly) non-registered files. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 65b7417c1b05..024745283783 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -416,6 +416,9 @@ enum io_uring_msg_ring_flags { * IORING_NOP_INJECT_RESULT Inject result from sqe->result */ #define IORING_NOP_INJECT_RESULT (1U << 0) +#define IORING_NOP_FILE (1U << 1) +#define IORING_NOP_FIXED_FILE (1U << 2) +#define IORING_NOP_FIXED_BUFFER (1U << 3) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From ff1256b8f3c45f222bce19fbfc1e1bc498b31d03 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Oct 2024 08:54:28 -0600 Subject: io_uring/rsrc: move struct io_fixed_file to rsrc.h header There's no need for this internal structure to be visible, move it to the private rsrc.h header instead. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2f12828b22a4..d4ba4ae480d6 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -55,11 +55,6 @@ struct io_wq_work { int cancel_seq; }; -struct io_fixed_file { - /* file * with additional FFS_* flags */ - unsigned long file_ptr; -}; - struct io_file_table { struct io_fixed_file *files; unsigned long *bitmap; -- cgit v1.2.3 From 729ee76561fd3d0766ab70de92076d7f12f465b9 Mon Sep 17 00:00:00 2001 From: Nikunj Kela Date: Tue, 10 Sep 2024 10:15:32 -0700 Subject: dt-bindings: arm: qcom: add the SoC ID for SA8255P Add the SoC ID entry for SA8255P. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Nikunj Kela Link: https://lore.kernel.org/r/20240910171534.2412263-2-quic_nkela@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 1a53ca2df523..f27c96603f91 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -258,6 +258,7 @@ #define QCOM_ID_SAR2130P 525 #define QCOM_ID_SM8475 530 #define QCOM_ID_SM8475P 531 +#define QCOM_ID_SA8255P 532 #define QCOM_ID_SA8775P 534 #define QCOM_ID_QRU1000 539 #define QCOM_ID_SM8475_2 540 -- cgit v1.2.3 From 8cbebdfe3e794e5b4f3e08a7297a513bf14fec6e Mon Sep 17 00:00:00 2001 From: Tengfei Fan Date: Wed, 11 Sep 2024 19:10:55 +0800 Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS9100 Add the ID for the Qualcomm QCS9100 SoC. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Tengfei Fan Link: https://lore.kernel.org/r/20240911-add_qcs9100_support-v2-1-e43a71ceb017@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index f27c96603f91..b66f8aeb9f47 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -279,6 +279,7 @@ #define QCOM_ID_QCM8550 604 #define QCOM_ID_IPQ5300 624 #define QCOM_ID_IPQ5321 650 +#define QCOM_ID_QCS9100 667 #define QCOM_ID_QCS8300 674 #define QCOM_ID_QCS8275 675 #define QCOM_ID_QCS615 680 -- cgit v1.2.3 From b2473a359763e27567993e7d8f37de82f57a0829 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 23 Oct 2024 18:14:26 +0100 Subject: of/fdt: add dt_phys arg to early_init_dt_scan and early_init_dt_verify __pa() is only intended to be used for linear map addresses and using it for initial_boot_params which is in fixmap for arm64 will give an incorrect value. Hence save the physical address when it is known at boot time when calling early_init_dt_scan for arm64 and use it at kexec time instead of converting the virtual address using __pa(). Note that arm64 doesn't need the FDT region reserved in the DT as the kernel explicitly reserves the passed in FDT. Therefore, only a debug warning is fixed with this change. Reported-by: Breno Leitao Suggested-by: Mark Rutland Signed-off-by: Usama Arif Fixes: ac10be5cdbfa ("arm64: Use common of_kexec_alloc_and_setup_fdt()") Link: https://lore.kernel.org/r/20241023171426.452688-1-usamaarif642@gmail.com Signed-off-by: Rob Herring (Arm) --- include/linux/of_fdt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index d69ad5bb1eb1..b8d6c0c20876 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -31,6 +31,7 @@ extern void *of_fdt_unflatten_tree(const unsigned long *blob, extern int __initdata dt_root_addr_cells; extern int __initdata dt_root_size_cells; extern void *initial_boot_params; +extern phys_addr_t initial_boot_params_pa; extern char __dtb_start[]; extern char __dtb_end[]; @@ -70,8 +71,8 @@ extern u64 dt_mem_next_cell(int s, const __be32 **cellp); /* Early flat tree scan hooks */ extern int early_init_dt_scan_root(void); -extern bool early_init_dt_scan(void *params); -extern bool early_init_dt_verify(void *params); +extern bool early_init_dt_scan(void *dt_virt, phys_addr_t dt_phys); +extern bool early_init_dt_verify(void *dt_virt, phys_addr_t dt_phys); extern void early_init_dt_scan_nodes(void); extern const char *of_flat_dt_get_machine_name(void); -- cgit v1.2.3 From 4bbd360a5084d8f890f814327e1d9fbb1f0f6fa1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 24 Oct 2024 13:14:58 -0700 Subject: socket: Print pf->create() when it does not clear sock->sk on failure. I suggested to put DEBUG_NET_WARN_ON_ONCE() in __sock_create() to catch possible use-after-free. But the warning itself was not useful because our interest is in the callee than the caller. Let's define DEBUG_NET_WARN_ONCE() and print the name of pf->create() and the socket identifier. While at it, we enclose DEBUG_NET_WARN_ON_ONCE() in parentheses too to avoid a checkpatch error. Note that %pf or %pF were obsoleted and will be removed later as per comment in lib/vsprintf.c. Link: https://lore.kernel.org/netdev/202410231427.633734b3-lkp@intel.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241024201458.49412-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/net_debug.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/net_debug.h b/include/net/net_debug.h index 1e74684cbbdb..9fecb1496be3 100644 --- a/include/net/net_debug.h +++ b/include/net/net_debug.h @@ -149,9 +149,11 @@ do { \ #if defined(CONFIG_DEBUG_NET) -#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#define DEBUG_NET_WARN_ON_ONCE(cond) ((void)WARN_ON_ONCE(cond)) +#define DEBUG_NET_WARN_ONCE(cond, format...) ((void)WARN_ONCE(cond, format)) #else #define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define DEBUG_NET_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif #endif /* _LINUX_NET_DEBUG_H */ -- cgit v1.2.3 From 2a0df10434ddeb8b5b5aded90a5659aff3b106d7 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 23 Oct 2024 15:09:06 +0200 Subject: devlink: remove unused devlink_resource_occ_get_register() and _unregister() Remove not used devlink_resource_occ_get_register() and devlink_resource_occ_get_unregister() functions; current devlink resource users are fine with devl_ variants of the two. Reviewed-by: Wojciech Drewek Reviewed-by: Jiri Pirko Reviewed-by: Joe Damato Signed-off-by: Przemek Kitszel Link: https://patch.msgid.link/20241023131248.27192-7-przemyslaw.kitszel@intel.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index db5eff6cb60f..fdd6a0f9891d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1797,15 +1797,8 @@ void devl_resource_occ_get_register(struct devlink *devlink, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv); -void devlink_resource_occ_get_register(struct devlink *devlink, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv); void devl_resource_occ_get_unregister(struct devlink *devlink, u64 resource_id); - -void devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id); int devl_params_register(struct devlink *devlink, const struct devlink_param *params, size_t params_count); -- cgit v1.2.3 From e3302f9a503a632c125170dc3c72b2886a910b0a Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 23 Oct 2024 15:09:07 +0200 Subject: devlink: remove unused devlink_resource_register() Remove unused devlink_resource_register(); all the drivers use devl_resource_register() variant instead. Reviewed-by: Wojciech Drewek Reviewed-by: Jiri Pirko Reviewed-by: Joe Damato Signed-off-by: Przemek Kitszel Link: https://patch.msgid.link/20241023131248.27192-8-przemyslaw.kitszel@intel.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index fdd6a0f9891d..fbb9a2668e24 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1779,12 +1779,6 @@ int devl_resource_register(struct devlink *devlink, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); -int devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params); void devl_resources_unregister(struct devlink *devlink); void devlink_resources_unregister(struct devlink *devlink); int devl_resource_size_get(struct devlink *devlink, -- cgit v1.2.3 From 90ee6ed776c06435a3fe79c7f5344761f52e1760 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Oct 2024 16:23:59 +0200 Subject: fs: port files to file_ref Port files to rely on file_ref reference to improve scaling and gain overflow protection. - We continue to WARN during get_file() in case a file that is already marked dead is revived as get_file() is only valid if the caller already holds a reference to the file. This hasn't changed just the check changes. - The semantics for epoll and ttm's dmabuf usage have changed. Both epoll and ttm synchronize with __fput() to prevent the underlying file from beeing freed. (1) epoll Explaining epoll is straightforward using a simple diagram. Essentially, the mutex of the epoll instance needs to be taken in both __fput() and around epi_fget() preventing the file from being freed while it is polled or preventing the file from being resurrected. CPU1 CPU2 fput(file) -> __fput(file) -> eventpoll_release(file) -> eventpoll_release_file(file) mutex_lock(&ep->mtx) epi_item_poll() -> epi_fget() -> file_ref_get(file) mutex_unlock(&ep->mtx) mutex_lock(&ep->mtx); __ep_remove() mutex_unlock(&ep->mtx); -> kmem_cache_free(file) (2) ttm dmabuf This explanation is a bit more involved. A regular dmabuf file stashed the dmabuf in file->private_data and the file in dmabuf->file: file->private_data = dmabuf; dmabuf->file = file; The generic release method of a dmabuf file handles file specific things: f_op->release::dma_buf_file_release() while the generic dentry release method of a dmabuf handles dmabuf freeing including driver specific things: dentry->d_release::dma_buf_release() During ttm dmabuf initialization in ttm_object_device_init() the ttm driver copies the provided struct dma_buf_ops into a private location: struct ttm_object_device { spinlock_t object_lock; struct dma_buf_ops ops; void (*dmabuf_release)(struct dma_buf *dma_buf); struct idr idr; }; ttm_object_device_init(const struct dma_buf_ops *ops) { // copy original dma_buf_ops in private location tdev->ops = *ops; // stash the release method of the original struct dma_buf_ops tdev->dmabuf_release = tdev->ops.release; // override the release method in the copy of the struct dma_buf_ops // with ttm's own dmabuf release method tdev->ops.release = ttm_prime_dmabuf_release; } When a new dmabuf is created the struct dma_buf_ops with the overriden release method set to ttm_prime_dmabuf_release is passed in exp_info.ops: DEFINE_DMA_BUF_EXPORT_INFO(exp_info); exp_info.ops = &tdev->ops; exp_info.size = prime->size; exp_info.flags = flags; exp_info.priv = prime; The call to dma_buf_export() then sets mutex_lock_interruptible(&prime->mutex); dma_buf = dma_buf_export(&exp_info) { dmabuf->ops = exp_info->ops; } mutex_unlock(&prime->mutex); which creates a new dmabuf file and then install a file descriptor to it in the callers file descriptor table: ret = dma_buf_fd(dma_buf, flags); When that dmabuf file is closed we now get: fput(file) -> __fput(file) -> f_op->release::dma_buf_file_release() -> dput() -> d_op->d_release::dma_buf_release() -> dmabuf->ops->release::ttm_prime_dmabuf_release() mutex_lock(&prime->mutex); if (prime->dma_buf == dma_buf) prime->dma_buf = NULL; mutex_unlock(&prime->mutex); Where we can see that prime->dma_buf is set to NULL. So when we have the following diagram: CPU1 CPU2 fput(file) -> __fput(file) -> f_op->release::dma_buf_file_release() -> dput() -> d_op->d_release::dma_buf_release() -> dmabuf->ops->release::ttm_prime_dmabuf_release() ttm_prime_handle_to_fd() mutex_lock_interruptible(&prime->mutex) dma_buf = prime->dma_buf dma_buf && get_dma_buf_unless_doomed(dma_buf) -> file_ref_get(dma_buf->file) mutex_unlock(&prime->mutex); mutex_lock(&prime->mutex); if (prime->dma_buf == dma_buf) prime->dma_buf = NULL; mutex_unlock(&prime->mutex); -> kmem_cache_free(file) The logic of the mechanism is the same as for epoll: sync with __fput() preventing the file from being freed. Here the synchronization happens through the ttm instance's prime->mutex. Basically, the lifetime of the dma_buf and the file are tighly coupled. Both (1) and (2) used to call atomic_inc_not_zero() to check whether the file has already been marked dead and then refuse to revive it. This is only safe because both (1) and (2) sync with __fput() and thus prevent kmem_cache_free() on the file being called and thus prevent the file from being immediately recycled due to SLAB_TYPESAFE_BY_RCU. Both (1) and (2) have been ported from atomic_inc_not_zero() to file_ref_get(). That means a file that is already in the process of being marked as FILE_REF_DEAD: file_ref_put() cnt = atomic_long_dec_return() -> __file_ref_put(cnt) if (cnt == FIlE_REF_NOREF) atomic_long_try_cmpxchg_release(cnt, FILE_REF_DEAD) can be revived again: CPU1 CPU2 file_ref_put() cnt = atomic_long_dec_return() -> __file_ref_put(cnt) if (cnt == FIlE_REF_NOREF) file_ref_get() // Brings reference back to FILE_REF_ONEREF atomic_long_add_negative() atomic_long_try_cmpxchg_release(cnt, FILE_REF_DEAD) This is fine and inherent to the file_ref_get()/file_ref_put() semantics. For both (1) and (2) this is safe because __fput() is prevented from making progress if file_ref_get() fails due to the aforementioned synchronization mechanisms. Two cases need to be considered that affect both (1) epoll and (2) ttm dmabuf: (i) fput()'s file_ref_put() and marks the file as FILE_REF_NOREF but before that fput() can mark the file as FILE_REF_DEAD someone manages to sneak in a file_ref_get() and brings the refcount back from FILE_REF_NOREF to FILE_REF_ONEREF. In that case the original fput() doesn't call __fput(). For epoll the poll will finish and for ttm dmabuf the file can be used again. For ttm dambuf this is actually an advantage because it avoids immediately allocating a new dmabuf object. CPU1 CPU2 file_ref_put() cnt = atomic_long_dec_return() -> __file_ref_put(cnt) if (cnt == FIlE_REF_NOREF) file_ref_get() // Brings reference back to FILE_REF_ONEREF atomic_long_add_negative() atomic_long_try_cmpxchg_release(cnt, FILE_REF_DEAD) (ii) fput()'s file_ref_put() marks the file FILE_REF_NOREF and also suceeds in actually marking it FILE_REF_DEAD and then calls into __fput() to free the file. When either (1) or (2) call file_ref_get() they fail as atomic_long_add_negative() will return true. At the same time, both (1) and (2) all file_ref_get() under mutexes that __fput() must also acquire preventing kmem_cache_free() from freeing the file. So while this might be treated as a change in semantics for (1) and (2) it really isn't. It if should end up causing issues this can be fixed by adding a helper that does something like: long cnt = atomic_long_read(&ref->refcnt); do { if (cnt < 0) return false; } while (!atomic_long_try_cmpxchg(&ref->refcnt, &cnt, cnt + 1)); return true; which would block FILE_REF_NOREF to FILE_REF_ONEREF transitions. - Jann correctly pointed out that kmem_cache_zalloc() cannot be used anymore once files have been ported to file_ref_t. The kmem_cache_zalloc() call will memset() the whole struct file to zero when it is reallocated. This will also set file->f_ref to zero which mens that a concurrent file_ref_get() can return true: CPU1 CPU2 __get_file_rcu() rcu_dereference_raw() close() [frees file] alloc_empty_file() kmem_cache_zalloc() [reallocates same file] memset(..., 0, ...) file_ref_get() [increments 0->1, returns true] init_file() file_ref_init(..., 1) [sets to 0] rcu_dereference_raw() fput() file_ref_put() [decrements 0->FILE_REF_NOREF, frees file] [UAF] causing a concurrent __get_file_rcu() call to acquire a reference to the file that is about to be reallocated and immediately freeing it on realizing that it has been recycled. This causes a UAF for the task that reallocated/recycled the file. This is prevented by switching from kmem_cache_zalloc() to kmem_cache_alloc() and initializing the fields manually. With file->f_ref initialized last. Note that a memset() also isn't guaranteed to atomically update an unsigned long so it's theoretically possible to see torn and therefore bogus counter values. Link: https://lore.kernel.org/r/20241007-brauner-file-rcuref-v2-3-387e24dc9163@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..c13f648a1c13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -1005,7 +1006,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) /** * struct file - Represents a file - * @f_count: reference count + * @f_ref: reference count * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. * @f_mode: FMODE_* flags often used in hotpaths * @f_op: file operations @@ -1030,7 +1031,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) */ struct file { - atomic_long_t f_count; + file_ref_t f_ref; spinlock_t f_lock; fmode_t f_mode; const struct file_operations *f_op; @@ -1078,15 +1079,14 @@ struct file_handle { static inline struct file *get_file(struct file *f) { - long prior = atomic_long_fetch_inc_relaxed(&f->f_count); - WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n"); + file_ref_inc(&f->f_ref); return f; } struct file *get_file_rcu(struct file __rcu **f); struct file *get_file_active(struct file **f); -#define file_count(x) atomic_long_read(&(x)->f_count) +#define file_count(f) file_ref_read(&(f)->f_ref) #define MAX_NON_LFS ((1UL<<31) - 1) -- cgit v1.2.3 From 94b2a2c0e7cba3f163609dbd94120ee533ad2a07 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Thu, 17 Oct 2024 16:58:09 +0200 Subject: accel/ivpu: Remove copy engine support Copy engine was deprecated by the FW and is no longer supported. Compute engine includes all copy engine functionality and should be used instead. This change does not affect user space as the copy engine was never used outside of a couple of tests. Signed-off-by: Andrzej Kacprowski Reviewed-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20241017145817.121590-4-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 234664d35250..a35b97b097bf 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -258,7 +258,7 @@ struct drm_ivpu_bo_info { /* drm_ivpu_submit engines */ #define DRM_IVPU_ENGINE_COMPUTE 0 -#define DRM_IVPU_ENGINE_COPY 1 +#define DRM_IVPU_ENGINE_COPY 1 /* Deprecated */ /** * struct drm_ivpu_submit - Submit commands to the VPU @@ -289,10 +289,6 @@ struct drm_ivpu_submit { * %DRM_IVPU_ENGINE_COMPUTE: * * Performs Deep Learning Neural Compute Inference Operations - * - * %DRM_IVPU_ENGINE_COPY: - * - * Performs memory copy operations to/from system memory allocated for VPU */ __u32 engine; -- cgit v1.2.3 From 95b6235e36953e9d6c822270961e79751eee2276 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 28 Oct 2024 17:58:38 +0000 Subject: iommu: Make bus_iommu_probe() static With the last external caller of bus_iommu_probe() now gone, make it internal as it really should be. Signed-off-by: Robin Murphy Tested-by: H. Nikolaus Schaller Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Tested-by: Beleswar Padhi Link: https://lore.kernel.org/r/a7511a034a27259aff4e14d80a861d3c40fbff1e.1730136799.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..84a6ed5e803a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -784,7 +784,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -extern int bus_iommu_probe(const struct bus_type *bus); extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); -- cgit v1.2.3 From 386c2b877b97cde53c6e422f9081ae133492fd05 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 23 Oct 2024 16:14:51 +0800 Subject: tcp: add a common helper to debug the underlying issue Following the commit c8770db2d544 ("tcp: check skb is non-NULL in tcp_rto_delta_us()"), we decided to add a helper so that it's easier to get verbose warning on either cases. Link: https://lore.kernel.org/all/5632e043-bdba-4d75-bc7e-bf58014492fd@redhat.com/ Suggested-by: Paolo Abeni Signed-off-by: Jason Xing Cc: Neal Cardwell Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 739a9fb83d0c..8b8d94bb1746 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2430,6 +2430,19 @@ void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb); void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb); +static inline void tcp_warn_once(const struct sock *sk, bool cond, const char *str) +{ + WARN_ONCE(cond, + "%sout:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n", + str, + tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, + tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, + tcp_sk(sk)->tlp_high_seq, sk->sk_state, + inet_csk(sk)->icsk_ca_state, + tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache, + inet_csk(sk)->icsk_pmtu_cookie); +} + /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { @@ -2441,17 +2454,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; } else { - WARN_ONCE(1, - "rtx queue empty: " - "out:%u sacked:%u lost:%u retrans:%u " - "tlp_high_seq:%u sk_state:%u ca_state:%u " - "advmss:%u mss_cache:%u pmtu:%u\n", - tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, - tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, - tcp_sk(sk)->tlp_high_seq, sk->sk_state, - inet_csk(sk)->icsk_ca_state, - tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache, - inet_csk(sk)->icsk_pmtu_cookie); + tcp_warn_once(sk, 1, "rtx queue empty: "); return jiffies_to_usecs(rto); } -- cgit v1.2.3 From 668d663989c77fcb2a92748645e4c394b03d5988 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 23 Oct 2024 16:14:52 +0800 Subject: tcp: add more warn of socket in tcp_send_loss_probe() Add two fields to print in the helper which here covers tcp_send_loss_probe(). Link: https://lore.kernel.org/all/5632e043-bdba-4d75-bc7e-bf58014492fd@redhat.com/ Suggested-by: Paolo Abeni Signed-off-by: Jason Xing Cc: Neal Cardwell Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8b8d94bb1746..e9b37b76e894 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2433,8 +2433,9 @@ void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb); static inline void tcp_warn_once(const struct sock *sk, bool cond, const char *str) { WARN_ONCE(cond, - "%sout:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n", + "%scwn:%u out:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n", str, + tcp_snd_cwnd(tcp_sk(sk)), tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, tcp_sk(sk)->tlp_high_seq, sk->sk_state, -- cgit v1.2.3 From 133008e84b99e4f5f8cf3d8b768c995732df9406 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 16 Oct 2024 13:13:09 -0700 Subject: blk-integrity: remove seed for user mapped buffers The seed is only used for kernel generation and verification. That doesn't happen for user buffers, so passing the seed around doesn't accomplish anything. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Kanchan Joshi Link: https://lore.kernel.org/r/20241016201309.1090320-1-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/bio-integrity.h | 4 ++-- include/linux/blk-integrity.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index dd831c269e99..dbf0f74c1529 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -72,7 +72,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); @@ -99,7 +99,7 @@ static inline void bioset_integrity_free(struct bio_set *bs) } static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, - ssize_t len, u32 seed) + ssize_t len) { return -EINVAL; } diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 676f8f860c47..c7eae0bfb013 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -28,7 +28,7 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, - ssize_t bytes, u32 seed); + ssize_t bytes); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -104,8 +104,7 @@ static inline int blk_rq_map_integrity_sg(struct request *q, } static inline int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, - ssize_t bytes, - u32 seed) + ssize_t bytes) { return -EINVAL; } -- cgit v1.2.3 From db71aae70e3e646d8ba4cb50e4bd4c281a91c804 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Sat, 26 Oct 2024 12:53:36 +0000 Subject: net: checksum: Move from32to16() to generic header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit from32to16() is used by lib/checksum.c and also by arch/parisc/lib/checksum.c. The next patch will use it in the bpf_csum_diff helper. Move from32to16() to the include/net/checksum.h as csum_from32to16() and remove other implementations. Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Reviewed-by: Toke Høiland-Jørgensen Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20241026125339.26459-2-puranjay@kernel.org --- include/net/checksum.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 1338cb92c8e7..243f972267b8 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -151,6 +151,12 @@ static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) *csum = csum_add(csum_sub(*csum, old), new); } +static inline unsigned short csum_from32to16(unsigned int sum) +{ + sum += (sum >> 16) | (sum << 16); + return (unsigned short)(sum >> 16); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); -- cgit v1.2.3 From 899f44531fe6cac4b024710fec647ecc127724b8 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Wed, 30 Oct 2024 18:25:10 +0530 Subject: pmdomain: core: Add GENPD_FLAG_DEV_NAME_FW flag Introduce GENPD_FLAG_DEV_NAME_FW flag which instructs genpd to generate an unique device name using ida. It is aimed to be used by genpd providers which derive their names directly from FW making them susceptible to debugfs node creation failures. Reported-by: Johan Hovold Closes: https://lore.kernel.org/lkml/ZoQjAWse2YxwyRJv@hovoldconsulting.com/ Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains") Suggested-by: Ulf Hansson Suggested-by: Dmitry Baryshkov Signed-off-by: Sibi Sankar Cc: stable@vger.kernel.org Message-ID: <20241030125512.2884761-5-quic_sibis@quicinc.com> Signed-off-by: Ulf Hansson --- include/linux/pm_domain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b637ec14025f..cf4b11be3709 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -92,6 +92,10 @@ struct dev_pm_domain_list { * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, * but its corresponding OPP tables are not * described in DT, but are given directly by FW. + * + * GENPD_FLAG_DEV_NAME_FW: Instructs genpd to generate an unique device name + * using ida. It is used by genpd providers which + * get their genpd-names directly from FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -101,6 +105,7 @@ struct dev_pm_domain_list { #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) #define GENPD_FLAG_OPP_TABLE_FW (1U << 7) +#define GENPD_FLAG_DEV_NAME_FW (1U << 8) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ @@ -163,6 +168,7 @@ struct generic_pm_domain { atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ unsigned int device_count; /* Number of devices */ + unsigned int device_id; /* unique device id */ unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ -- cgit v1.2.3 From 750fd23926f1507cc826b5a4fdd4bfc7283e7723 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Tue, 22 Oct 2024 19:36:27 +0000 Subject: x86/mce: Add wrapper for struct mce to export vendor specific info Currently, exporting new additional machine check error information involves adding new fields for the same at the end of the struct mce. This additional information can then be consumed through mcelog or tracepoint. However, as new MSRs are being added (and will be added in the future) by CPU vendors on their newer CPUs with additional machine check error information to be exported, the size of struct mce will balloon on some CPUs, unnecessarily, since those fields are vendor-specific. Moreover, different CPU vendors may export the additional information in varying sizes. The problem particularly intensifies since struct mce is exposed to userspace as part of UAPI. It's bloating through vendor-specific data should be avoided to limit the information being sent out to userspace. Add a new structure mce_hw_err to wrap the existing struct mce. The same will prevent its ballooning since vendor-specifc data, if any, can now be exported through a union within the wrapper structure and through __dynamic_array in mce_record tracepoint. Furthermore, new internal kernel fields can be added to the wrapper struct without impacting the user space API. [ bp: Restore reverse x-mas tree order of function vars declarations. ] Suggested-by: Borislav Petkov (AMD) Signed-off-by: Avadhut Naik Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20241022194158.110073-2-avadhut.naik@amd.com --- include/trace/events/mce.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index f0f7b3cb2041..65aba1afcd07 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -19,9 +19,9 @@ TRACE_EVENT(mce_record, - TP_PROTO(struct mce *m), + TP_PROTO(struct mce_hw_err *err), - TP_ARGS(m), + TP_ARGS(err), TP_STRUCT__entry( __field( u64, mcgcap ) @@ -46,25 +46,25 @@ TRACE_EVENT(mce_record, ), TP_fast_assign( - __entry->mcgcap = m->mcgcap; - __entry->mcgstatus = m->mcgstatus; - __entry->status = m->status; - __entry->addr = m->addr; - __entry->misc = m->misc; - __entry->synd = m->synd; - __entry->ipid = m->ipid; - __entry->ip = m->ip; - __entry->tsc = m->tsc; - __entry->ppin = m->ppin; - __entry->walltime = m->time; - __entry->cpu = m->extcpu; - __entry->cpuid = m->cpuid; - __entry->apicid = m->apicid; - __entry->socketid = m->socketid; - __entry->cs = m->cs; - __entry->bank = m->bank; - __entry->cpuvendor = m->cpuvendor; - __entry->microcode = m->microcode; + __entry->mcgcap = err->m.mcgcap; + __entry->mcgstatus = err->m.mcgstatus; + __entry->status = err->m.status; + __entry->addr = err->m.addr; + __entry->misc = err->m.misc; + __entry->synd = err->m.synd; + __entry->ipid = err->m.ipid; + __entry->ip = err->m.ip; + __entry->tsc = err->m.tsc; + __entry->ppin = err->m.ppin; + __entry->walltime = err->m.time; + __entry->cpu = err->m.extcpu; + __entry->cpuid = err->m.cpuid; + __entry->apicid = err->m.apicid; + __entry->socketid = err->m.socketid; + __entry->cs = err->m.cs; + __entry->bank = err->m.bank; + __entry->cpuvendor = err->m.cpuvendor; + __entry->microcode = err->m.microcode; ), TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", -- cgit v1.2.3 From e52750fb1458ae9ea5860a08ed7a149185bc5b97 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 Oct 2024 19:36:28 +0000 Subject: tracing: Add __print_dynamic_array() helper When printing a dynamic array in a trace event, the method is rather ugly. It has the format of: __print_array(__get_dynamic_array(array), __get_dynmaic_array_len(array) / el_size, el_size) Since dynamic arrays are known to the tracing infrastructure, create a helper macro that does the above for you. __print_dynamic_array(array, el_size) Which would expand to the same output. Signed-off-by: Steven Rostedt (Google) Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20241022194158.110073-3-avadhut.naik@amd.com --- include/trace/stages/stage3_trace_output.h | 8 ++++++++ include/trace/stages/stage7_class_define.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h index c1fb1355d309..1e7b0bef95f5 100644 --- a/include/trace/stages/stage3_trace_output.h +++ b/include/trace/stages/stage3_trace_output.h @@ -119,6 +119,14 @@ trace_print_array_seq(p, array, count, el_size); \ }) +#undef __print_dynamic_array +#define __print_dynamic_array(array, el_size) \ + ({ \ + __print_array(__get_dynamic_array(array), \ + __get_dynamic_array_len(array) / (el_size), \ + (el_size)); \ + }) + #undef __print_hex_dump #define __print_hex_dump(prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii) \ diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h index bcb960d16fc0..fcd564a590f4 100644 --- a/include/trace/stages/stage7_class_define.h +++ b/include/trace/stages/stage7_class_define.h @@ -22,6 +22,7 @@ #undef __get_rel_cpumask #undef __get_rel_sockaddr #undef __print_array +#undef __print_dynamic_array #undef __print_hex_dump #undef __get_buf -- cgit v1.2.3 From b35108a51cf7bab58d7eace1267d7965978bcdb8 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 30 Oct 2024 17:47:35 +0000 Subject: jiffies: Define secs_to_jiffies() secs_to_jiffies() is defined in hci_event.c and cannot be reused by other call sites. Hoist it into the core code to allow conversion of the ~1150 usages of msecs_to_jiffies() that either: - use a multiplier value of 1000 or equivalently MSEC_PER_SEC, or - have timeouts that are denominated in seconds (i.e. end in 000) It's implemented as a macro to allow usage in static initializers. This will also allow conversion of yet more sites that use (sec * HZ) directly, and improve their readability. Suggested-by: Michael Kelley Signed-off-by: Easwar Hariharan Signed-off-by: Thomas Gleixner Reviewed-by: Luiz Augusto von Dentz Link: https://lore.kernel.org/all/20241030-open-coded-timeouts-v3-1-9ba123facf88@linux.microsoft.com --- include/linux/jiffies.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 5d21dacd62bc..ed945f42e064 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -526,6 +526,19 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) } } +/** + * secs_to_jiffies: - convert seconds to jiffies + * @_secs: time in seconds + * + * Conversion is done by simple multiplication with HZ + * + * secs_to_jiffies() is defined as a macro rather than a static inline + * function so it can be used in static initializers. + * + * Return: jiffies value + */ +#define secs_to_jiffies(_secs) ((_secs) * HZ) + extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) static inline unsigned long _usecs_to_jiffies(const unsigned int u) -- cgit v1.2.3 From d9d959c36bec59f11c0eb6b5308729e3c4901b5e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:34 +0100 Subject: PCI: Make pcim_request_all_regions() a public function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to remove the deprecated function pcim_iomap_regions_request_all(), a few drivers need an interface to request all BARs a PCI device offers. Make pcim_request_all_regions() a public interface. Link: https://lore.kernel.org/r/20241030112743.104395-2-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal Reviewed-by: Ilpo Järvinen --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..3b151c8331e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2293,6 +2293,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } #endif +int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, const char *name); -- cgit v1.2.3 From 6d9c59212523e719a63575222f1cd1b0aca3da4f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:43 +0100 Subject: PCI: Remove pcim_iomap_regions_request_all() pcim_iomap_regions_request_all() have been deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). All users of this function have been ported to other interfaces by now. Remove pcim_iomap_regions_request_all(). Link: https://lore.kernel.org/r/20241030112743.104395-11-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3b151c8331e5..b59197635c5c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2301,8 +2301,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, - const char *name); void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); -- cgit v1.2.3 From 4a6afd60733c75369680f4a40c82b7c8528f4a7a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:04 +0200 Subject: PCI: Make pcim_iounmap_region() a public function The function pcim_iounmap_regions() is problematic because it uses a bitmask mechanism to release / iounmap multiple BARs at once. It, thus, prevents getting rid of the problematic iomap table mechanism which was deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). pcim_iounmap_region() does not have that problem. Make it public as the successor of pcim_iounmap_regions(). Link: https://lore.kernel.org/r/20241016094911.24818-3-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index b59197635c5c..b8d248c136ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2297,6 +2297,7 @@ int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, const char *name); +void pcim_iounmap_region(struct pci_dev *pdev, int bar); void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); -- cgit v1.2.3 From 3e7f43188ee227bcf0f07f60a00f1fd1aca10e6a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:01:36 -0700 Subject: KVM: Protect vCPU's "last run PID" with rwlock, not RCU To avoid jitter on KVM_RUN due to synchronize_rcu(), use a rwlock instead of RCU to protect vcpu->pid, a.k.a. the pid of the task last used to a vCPU. When userspace is doing M:N scheduling of tasks to vCPUs, e.g. to run SEV migration helper vCPUs during post-copy, the synchronize_rcu() needed to change the PID associated with the vCPU can stall for hundreds of milliseconds, which is problematic for latency sensitive post-copy operations. In the directed yield path, do not acquire the lock if it's contended, i.e. if the associated PID is changing, as that means the vCPU's task is already running. Reported-by: Steve Rutherford Reviewed-by: Steve Rutherford Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240802200136.329973-3-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 02f0206fd2dc..18a1672ffcbf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -334,7 +334,8 @@ struct kvm_vcpu { #ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; #endif - struct pid __rcu *pid; + struct pid *pid; + rwlock_t pid_lock; int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; -- cgit v1.2.3 From 9e9af8bbb5f9b565b9faf691f96f661791e199b0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 10 Oct 2024 07:26:04 -0700 Subject: perf/x86/rapl: Clean up cpumask and hotplug The rapl pmu is die scope, which is supported by the generic perf_event subsystem now. Set the scope for the rapl PMU and remove all the cpumask and hotplug codes. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Oliver Sang Tested-by: Dhananjay Ugwekar Link: https://lore.kernel.org/r/20241010142604.770192-2-kan.liang@linux.intel.com --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2361ed4d2b15..37a9afffb59e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -208,7 +208,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, - CPUHP_AP_PERF_X86_RAPL_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From dd1a7567784e2b1f80258be04f57bcfa82c997eb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 23 Oct 2024 21:41:59 -0700 Subject: uprobes: SRCU-protect uretprobe lifetime (with timeout) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid taking refcount on uprobe in prepare_uretprobe(), instead take uretprobe-specific SRCU lock and keep it active as kernel transfers control back to user space. Given we can't rely on user space returning from traced function within reasonable time period, we need to make sure not to keep SRCU lock active for too long, though. To that effect, we employ a timer callback which is meant to terminate SRCU lock region after predefined timeout (currently set to 100ms), and instead transfer underlying struct uprobe's lifetime protection to refcounting. This fallback to less scalable refcounting after 100ms is a fine tradeoff from uretprobe's scalability and performance perspective, because uretprobing *long running* user functions inherently doesn't run into scalability issues (there is just not enough frequency to cause noticeable issues with either performance or scalability). The overall trick is in ensuring synchronization between current thread and timer's callback fired on some other thread. To cope with that with minimal logic complications, we add hprobe wrapper which is used to contain all the synchronization related issues behind a small number of basic helpers: hprobe_expire() for "downgrading" uprobe from SRCU-protected state to refcounted state, and a hprobe_consume() and hprobe_finalize() pair of single-use consuming helpers. Other than that, whatever current thread's logic is there stays the same, as timer thread cannot modify return_instance state (or add new/remove old return_instances). It only takes care of SRCU unlock and uprobe refcounting, which is hidden from the higher-level uretprobe handling logic. We use atomic xchg() in hprobe_consume(), which is called from performance critical handle_uretprobe_chain() function run in the current context. When uncontended, this xchg() doesn't seem to hurt performance as there are no other competing CPUs fighting for the same cache line. We also mark struct return_instance as ____cacheline_aligned to ensure no false sharing can happen. Another technical moment. We need to make sure that the list of return instances can be safely traversed under RCU from timer callback, so we delay return_instance freeing with kfree_rcu() and make sure that list modifications use RCU-aware operations. Also, given SRCU lock survives transition from kernel to user space and back we need to use lower-level __srcu_read_lock() and __srcu_read_unlock() to avoid lockdep complaining. Just to give an impression of a kind of performance improvements this change brings, below are benchmarking results with and without these SRCU changes, assuming other uprobe optimizations (mainly RCU Tasks Trace for entry uprobes, lockless RB-tree lookup, and lockless VMA to uprobe lookup) are left intact: WITHOUT SRCU for uretprobes =========================== uretprobe-nop ( 1 cpus): 2.197 ± 0.002M/s ( 2.197M/s/cpu) uretprobe-nop ( 2 cpus): 3.325 ± 0.001M/s ( 1.662M/s/cpu) uretprobe-nop ( 3 cpus): 4.129 ± 0.002M/s ( 1.376M/s/cpu) uretprobe-nop ( 4 cpus): 6.180 ± 0.003M/s ( 1.545M/s/cpu) uretprobe-nop ( 8 cpus): 7.323 ± 0.005M/s ( 0.915M/s/cpu) uretprobe-nop (16 cpus): 6.943 ± 0.005M/s ( 0.434M/s/cpu) uretprobe-nop (32 cpus): 5.931 ± 0.014M/s ( 0.185M/s/cpu) uretprobe-nop (64 cpus): 5.145 ± 0.003M/s ( 0.080M/s/cpu) uretprobe-nop (80 cpus): 4.925 ± 0.005M/s ( 0.062M/s/cpu) WITH SRCU for uretprobes ======================== uretprobe-nop ( 1 cpus): 1.968 ± 0.001M/s ( 1.968M/s/cpu) uretprobe-nop ( 2 cpus): 3.739 ± 0.003M/s ( 1.869M/s/cpu) uretprobe-nop ( 3 cpus): 5.616 ± 0.003M/s ( 1.872M/s/cpu) uretprobe-nop ( 4 cpus): 7.286 ± 0.002M/s ( 1.822M/s/cpu) uretprobe-nop ( 8 cpus): 13.657 ± 0.007M/s ( 1.707M/s/cpu) uretprobe-nop (32 cpus): 45.305 ± 0.066M/s ( 1.416M/s/cpu) uretprobe-nop (64 cpus): 42.390 ± 0.922M/s ( 0.662M/s/cpu) uretprobe-nop (80 cpus): 47.554 ± 2.411M/s ( 0.594M/s/cpu) Signed-off-by: Andrii Nakryiko Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241024044159.3156646-3-andrii@kernel.org --- include/linux/uprobes.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index dbaf04189548..7a051b5d2edd 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -15,6 +15,7 @@ #include #include #include +#include struct uprobe; struct vm_area_struct; @@ -67,6 +68,53 @@ enum uprobe_task_state { UTASK_SSTEP_TRAPPED, }; +/* The state of hybrid-lifetime uprobe inside struct return_instance */ +enum hprobe_state { + HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */ + HPROBE_STABLE, /* refcounted uprobe */ + HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */ + HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */ +}; + +/* + * Hybrid lifetime uprobe. Represents a uprobe instance that could be either + * SRCU protected (with SRCU protection eventually potentially timing out), + * refcounted using uprobe->ref, or there could be no valid uprobe (NULL). + * + * hprobe's internal state is setup such that background timer thread can + * atomically "downgrade" temporarily RCU-protected uprobe into refcounted one + * (or no uprobe, if refcounting failed). + * + * *stable* pointer always point to the uprobe (or could be NULL if there is + * was no valid underlying uprobe to begin with). + * + * *leased* pointer is the key to achieving race-free atomic lifetime state + * transition and can have three possible states: + * - either the same non-NULL value as *stable*, in which case uprobe is + * SRCU-protected; + * - NULL, in which case uprobe (if there is any) is refcounted; + * - special __UPROBE_DEAD value, which represents an uprobe that was SRCU + * protected initially, but SRCU period timed out and we attempted to + * convert it to refcounted, but refcount_inc_not_zero() failed, because + * uprobe effectively went away (the last consumer unsubscribed). In this + * case it's important to know that *stable* pointer (which still has + * non-NULL uprobe pointer) shouldn't be used, because lifetime of + * underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an + * internal marker and is handled transparently by hprobe_fetch() helper. + * + * When uprobe is SRCU-protected, we also record srcu_idx value, necessary for + * SRCU unlocking. + * + * See hprobe_expire() and hprobe_fetch() for details of race-free uprobe + * state transitioning details. It all hinges on atomic xchg() over *leaded* + * pointer. *stable* pointer, once initially set, is not modified concurrently. + */ +struct hprobe { + enum hprobe_state state; + int srcu_idx; + struct uprobe *uprobe; +}; + /* * uprobe_task: Metadata of a task while it singlesteps. */ @@ -86,6 +134,7 @@ struct uprobe_task { }; struct uprobe *active_uprobe; + struct timer_list ri_timer; unsigned long xol_vaddr; struct arch_uprobe *auprobe; @@ -100,7 +149,7 @@ struct return_consumer { }; struct return_instance { - struct uprobe *uprobe; + struct hprobe hprobe; unsigned long func; unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ @@ -108,9 +157,10 @@ struct return_instance { int consumers_cnt; struct return_instance *next; /* keep as stack */ + struct rcu_head rcu; struct return_consumer consumers[] __counted_by(consumers_cnt); -}; +} ____cacheline_aligned; enum rp_check { RP_CHECK_CALL, -- cgit v1.2.3 From f247fd22e9f27d919d81861e4dcc438e0b6d179b Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 23 Oct 2024 08:56:00 +0200 Subject: s390/time: Add clocksource id to TOD clock To allow specifying the clock source in the upcoming PtP driver, add a clocksource ID to the s390 TOD clock. Acked-by: Heiko Carstens Acked-by: Richard Cochran Signed-off-by: Sven Schnelle Link: https://patch.msgid.link/20241023065601.449586-2-svens@linux.ibm.com Signed-off-by: Jakub Kicinski --- include/linux/clocksource_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 2bb4d8c2f1b0..c4ef4ae2eded 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -6,6 +6,7 @@ enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER, + CSID_S390_TOD, CSID_X86_TSC_EARLY, CSID_X86_TSC, CSID_X86_KVM_CLK, -- cgit v1.2.3 From 2748697225c38a19666bba6a83afc6bf46ee16be Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Oct 2024 16:52:46 +0300 Subject: net: sched: propagate "skip_sw" flag to struct flow_cls_common_offload Background: switchdev ports offload the Linux bridge, and most of the packets they handle will never see the CPU. The ports between which there exists no hardware data path are considered 'foreign' to switchdev. These can either be normal physical NICs without switchdev offload, or incompatible switchdev ports, or virtual interfaces like veth/dummy/etc. In some cases, an offloaded filter can only do half the work, and the rest must be handled by software. Redirecting/mirroring from the ingress of a switchdev port towards a foreign interface is one example of combined hardware/software data path. The most that the switchdev port can do is to extract the matching packets from its offloaded data path and send them to the CPU. From there on, the software filter runs (a second time, after the first run in hardware) on the packet and performs the mirred action. It makes sense for switchdev drivers which allow this kind of "half offloading" to sense the "skip_sw" flag of the filter/action pair, and deny attempts from the user to install a filter that does not run in software, because that simply won't work. In fact, a mirred action on a switchdev port towards a dummy interface appears to be a valid way of (selectively) monitoring offloaded traffic that flows through it. IFF_PROMISC was also discussed years ago, but (despite initial disagreement) there seems to be consensus that this flag should not affect the destination taken by packets, but merely whether or not the NIC discards packets with unknown MAC DA for local processing. [1] https://lore.kernel.org/netdev/20190830092637.7f83d162@ceranb/ [2] https://lore.kernel.org/netdev/20191002233750.13566-1-olteanv@gmail.com/ Suggested-by: Ido Schimmel Link: https://lore.kernel.org/netdev/ZxUo0Dc0M5Y6l9qF@shredder.mtl.com/ Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20241023135251.1752488-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 1 + include/net/pkt_cls.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 292cd8f4b762..596ab9791e4d 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -685,6 +685,7 @@ struct flow_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; + bool skip_sw; struct netlink_ext_ack *extack; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4880b3a7aced..cf199af85c52 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -755,6 +755,7 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio >> 16; + cls_common->skip_sw = tc_skip_sw(flags); if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE) cls_common->extack = extack; } -- cgit v1.2.3 From 164b5e20cdf6038f1b38867d2f6252ec6f10c356 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:14 +0100 Subject: KVM: arm64: nv: Handle CNTHCTL_EL2 specially Accessing CNTHCTL_EL2 is fraught with danger if running with HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and thus can be changed behind our back, while the rest lives in the CNTHCTL_EL2 shadow copy that is memory-based. Yes, this is a lot of fun! Make sure that we merge the two on read access, while we can write to CNTKCTL_EL1 in a more straightforward manner. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-7-maz@kernel.org Signed-off-by: Oliver Upton --- include/kvm/arm_arch_timer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c819c5d16613..fd650a8789b9 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -147,6 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt); void kvm_timer_cpu_up(void); void kvm_timer_cpu_down(void); +/* CNTKCTL_EL1 valid bits as of DDI0487J.a */ +#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0)) + static inline bool has_cntpoff(void) { return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); -- cgit v1.2.3 From d4fca1358ea9096f2f6ed942e2cb3a820073dfc1 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Tue, 22 Oct 2024 19:36:29 +0000 Subject: x86/MCE/AMD: Add support for new MCA_SYND{1,2} registers Starting with Zen4, AMD's Scalable MCA systems incorporate two new registers: MCA_SYND1 and MCA_SYND2. These registers will include supplemental error information in addition to the existing MCA_SYND register. The data within these registers is considered valid if MCA_STATUS[SyndV] is set. Userspace error decoding tools like rasdaemon gather related hardware error information through the tracepoints. Therefore, export these two registers through the mce_record tracepoint so that tools like rasdaemon can parse them and output the supplemental error information like FRU text contained in them. [ bp: Massage. ] Signed-off-by: Yazen Ghannam Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20241022194158.110073-4-avadhut.naik@amd.com --- include/trace/events/mce.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 65aba1afcd07..c1c50df9ecfd 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -43,6 +43,7 @@ TRACE_EVENT(mce_record, __field( u8, bank ) __field( u8, cpuvendor ) __field( u32, microcode ) + __dynamic_array(u8, v_data, sizeof(err->vendor)) ), TP_fast_assign( @@ -65,9 +66,10 @@ TRACE_EVENT(mce_record, __entry->bank = err->m.bank; __entry->cpuvendor = err->m.cpuvendor; __entry->microcode = err->m.microcode; + memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor)); ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, @@ -83,7 +85,8 @@ TRACE_EVENT(mce_record, __entry->walltime, __entry->socketid, __entry->apicid, - __entry->microcode) + __entry->microcode, + __print_dynamic_array(v_data, sizeof(u8))) ); #endif /* _TRACE_MCE_H */ -- cgit v1.2.3 From 3b1596a21fbf210f5b763fd3c0be280650475b52 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Oct 2024 13:54:43 +0100 Subject: clockevents: Shutdown and unregister current clockevents at CPUHP_AP_TICK_DYING The way the clockevent devices are finally stopped while a CPU is offlining is currently chaotic. The layout being by order: 1) tick_sched_timer_dying() stops the tick and the underlying clockevent but only for oneshot case. The periodic tick and its related clockevent still runs. 2) tick_broadcast_offline() detaches and stops the per-cpu oneshot broadcast and append it to the released list. 3) Some individual clockevent drivers stop the clockevents (a second time if the tick is oneshot) 4) Once the CPU is dead, a control CPU remotely detaches and stops (a 3rd time if oneshot mode) the CPU clockevent and adds it to the released list. 5) The released list containing the broadcast device released on step 2) and the remotely detached clockevent from step 4) are unregistered. These random events can be factorized if the current clockevent is detached and stopped by the dying CPU at the generic layer, that is from the dying CPU: a) Stop the tick b) Stop/detach the underlying per-cpu oneshot broadcast clockevent c) Stop/detach the underlying clockevent d) Release / unregister the clockevents from b) and c) e) Release / unregister the remaining clockevents from the dying CPU. This part could be performed by the dying CPU This way the drivers and the tick layer don't need to care about clockevent operations during cpuhotplug down. This also unifies the tick behaviour on offline CPUs between oneshot and periodic modes, avoiding offline ticks altogether for sanity. Adopt the simplification. [ tglx: Remove the WARN_ON() in clockevents_register_device() as that is called from an upcoming CPU before the CPU is marked online ] Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241029125451.54574-3-frederic@kernel.org --- include/linux/tick.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index 72744638c5b0..b0c74bfe0600 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -20,12 +20,10 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) -- cgit v1.2.3 From 697a4001d31a607a72c6297e4eb0f7918c6e6929 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 7 Oct 2024 01:14:05 +0100 Subject: mfd: axp20x: Ensure relationship between IDs and model names At the moment there is an implicit relationship between the AXP model IDs and the order of the strings in the axp20x_model_names[] array. This is fragile, and makes adding IDs in the middle error prone. Make this relationship official by changing the ID type to the actual enum used, and using indexed initialisers for the string list. Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20241007001408.27249-3-andre.przywara@arm.com Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index f4dfc1871a95..79ecaaaa2070 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -959,7 +959,7 @@ struct axp20x_dev { unsigned long irq_flags; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irqc; - long variant; + enum axp20x_variants variant; int nr_cells; const struct mfd_cell *cells; const struct regmap_config *regmap_cfg; -- cgit v1.2.3 From 35fec94afe045856456faca4879b9c560e39d1e3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 7 Oct 2024 01:14:07 +0100 Subject: mfd: axp20x: Add support for AXP323 The X-Powers AXP323 is a very close sibling of the AXP313A. The only difference seems to be the ability to dual-phase the first two DC/DC converter, which adds another register. Add the required boilerplate to introduce a new PMIC to the AXP MFD driver. Where possible, this just maps into the existing structs defined for the AXP313A, only deviating where needed. Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20241007001408.27249-5-andre.przywara@arm.com Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 79ecaaaa2070..c3df0e615fbf 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -19,6 +19,7 @@ enum axp20x_variants { AXP223_ID, AXP288_ID, AXP313A_ID, + AXP323_ID, AXP717_ID, AXP803_ID, AXP806_ID, @@ -113,6 +114,7 @@ enum axp20x_variants { #define AXP313A_SHUTDOWN_CTRL 0x1a #define AXP313A_IRQ_EN 0x20 #define AXP313A_IRQ_STATE 0x21 +#define AXP323_DCDC_MODE_CTRL2 0x22 #define AXP717_ON_INDICATE 0x00 #define AXP717_PMU_STATUS_2 0x01 -- cgit v1.2.3 From 6e31bb8d3a63bb2c3efab2fb6bcfccac677a4581 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Fri, 18 Oct 2024 11:10:46 +0300 Subject: mfd: mt6397: Add initial support for MT6328 The MT6328 PMIC is commonly used with the MT6735 SoC. Add initial support for this PMIC. Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20241018081050.23592-5-y.oudjana@protonmail.com Signed-off-by: Lee Jones --- include/linux/mfd/mt6328/core.h | 53 +++ include/linux/mfd/mt6328/registers.h | 822 +++++++++++++++++++++++++++++++++++ include/linux/mfd/mt6397/core.h | 11 +- 3 files changed, 881 insertions(+), 5 deletions(-) create mode 100644 include/linux/mfd/mt6328/core.h create mode 100644 include/linux/mfd/mt6328/registers.h (limited to 'include') diff --git a/include/linux/mfd/mt6328/core.h b/include/linux/mfd/mt6328/core.h new file mode 100644 index 000000000000..9a08aed72b9f --- /dev/null +++ b/include/linux/mfd/mt6328/core.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015 MediaTek Inc. + * Copyright (c) 2022 Yassine Oudjana + */ + +#ifndef __MFD_MT6328_CORE_H__ +#define __MFD_MT6328_CORE_H__ + +enum mt6328_irq_status_numbers { + MT6328_IRQ_STATUS_PWRKEY = 0, + MT6328_IRQ_STATUS_HOMEKEY, + MT6328_IRQ_STATUS_PWRKEY_R, + MT6328_IRQ_STATUS_HOMEKEY_R, + MT6328_IRQ_STATUS_THR_H, + MT6328_IRQ_STATUS_THR_L, + MT6328_IRQ_STATUS_BAT_H, + MT6328_IRQ_STATUS_BAT_L, + MT6328_IRQ_STATUS_RTC, + MT6328_IRQ_STATUS_AUDIO, + MT6328_IRQ_STATUS_ACCDET, + MT6328_IRQ_STATUS_ACCDET_EINT, + MT6328_IRQ_STATUS_ACCDET_NEGV, + MT6328_IRQ_STATUS_NI_LBAT_INT, + MT6328_IRQ_STATUS_VPROC_OC = 16, + MT6328_IRQ_STATUS_VSYS_OC, + MT6328_IRQ_STATUS_VLTE_OC, + MT6328_IRQ_STATUS_VCORE_OC, + MT6328_IRQ_STATUS_VPA_OC, + MT6328_IRQ_STATUS_LDO_OC, + MT6328_IRQ_STATUS_BAT2_H, + MT6328_IRQ_STATUS_BAT2_L, + MT6328_IRQ_STATUS_VISMPS0_H, + MT6328_IRQ_STATUS_VISMPS0_L, + MT6328_IRQ_STATUS_AUXADC_IMP, + MT6328_IRQ_STATUS_OV = 32, + MT6328_IRQ_STATUS_BVALID_DET, + MT6328_IRQ_STATUS_VBATON_HV, + MT6328_IRQ_STATUS_VBATON_UNDET, + MT6328_IRQ_STATUS_WATCHDOG, + MT6328_IRQ_STATUS_PCHR_CM_VDEC, + MT6328_IRQ_STATUS_CHRDET, + MT6328_IRQ_STATUS_PCHR_CM_VINC, + MT6328_IRQ_STATUS_FG_BAT_H, + MT6328_IRQ_STATUS_FG_BAT_L, + MT6328_IRQ_STATUS_FG_CUR_H, + MT6328_IRQ_STATUS_FG_CUR_L, + MT6328_IRQ_STATUS_FG_ZCV, + MT6328_IRQ_STATUS_SPKL_D, + MT6328_IRQ_STATUS_SPKL_AB, +}; + +#endif /* __MFD_MT6323_CORE_H__ */ diff --git a/include/linux/mfd/mt6328/registers.h b/include/linux/mfd/mt6328/registers.h new file mode 100644 index 000000000000..8199aaea27b9 --- /dev/null +++ b/include/linux/mfd/mt6328/registers.h @@ -0,0 +1,822 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Yassine Oudjana + */ + +#ifndef __MFD_MT6328_REGISTERS_H__ +#define __MFD_MT6328_REGISTERS_H__ + +/* PMIC Registers */ +#define MT6328_STRUP_CON0 0x0000 +#define MT6328_STRUP_CON2 0x0002 +#define MT6328_STRUP_CON3 0x0004 +#define MT6328_STRUP_CON4 0x0006 +#define MT6328_STRUP_CON5 0x0008 +#define MT6328_STRUP_CON6 0x000a +#define MT6328_STRUP_CON7 0x000c +#define MT6328_STRUP_CON8 0x000e +#define MT6328_STRUP_CON9 0x0010 +#define MT6328_STRUP_CON10 0x0012 +#define MT6328_STRUP_CON11 0x0014 +#define MT6328_STRUP_CON12 0x0016 +#define MT6328_STRUP_CON13 0x0018 +#define MT6328_STRUP_CON14 0x001a +#define MT6328_STRUP_CON15 0x001c +#define MT6328_STRUP_CON16 0x001e +#define MT6328_STRUP_CON17 0x0020 +#define MT6328_STRUP_CON18 0x0022 +#define MT6328_STRUP_CON19 0x0024 +#define MT6328_STRUP_CON20 0x0026 +#define MT6328_STRUP_CON21 0x0028 +#define MT6328_STRUP_CON22 0x002a +#define MT6328_STRUP_CON23 0x002c +#define MT6328_STRUP_CON24 0x002e +#define MT6328_STRUP_CON25 0x0030 +#define MT6328_STRUP_CON26 0x0032 +#define MT6328_STRUP_CON27 0x0034 +#define MT6328_STRUP_CON28 0x0036 +#define MT6328_STRUP_CON29 0x0038 +#define MT6328_STRUP_CON30 0x003a +#define MT6328_STRUP_CON31 0x003c +#define MT6328_STRUP_CON32 0x003e +#define MT6328_STRUP_ANA_CON0 0x0040 +#define MT6328_HWCID 0x0200 +#define MT6328_SWCID 0x0202 +#define MT6328_TOP_CON 0x0204 +#define MT6328_TEST_OUT 0x0206 +#define MT6328_TEST_CON0 0x0208 +#define MT6328_TEST_CON1 0x020a +#define MT6328_TESTMODE_SW 0x020c +#define MT6328_EN_STATUS0 0x020e +#define MT6328_EN_STATUS1 0x0210 +#define MT6328_EN_STATUS2 0x0212 +#define MT6328_OCSTATUS0 0x0214 +#define MT6328_OCSTATUS1 0x0216 +#define MT6328_OCSTATUS2 0x0218 +#define MT6328_PGDEBSTATUS 0x021a +#define MT6328_PGSTATUS 0x021c +#define MT6328_THERMALSTATUS 0x021e +#define MT6328_TOPSTATUS 0x0220 +#define MT6328_TDSEL_CON 0x0222 +#define MT6328_RDSEL_CON 0x0224 +#define MT6328_SMT_CON0 0x0226 +#define MT6328_SMT_CON1 0x0228 +#define MT6328_SMT_CON2 0x022a +#define MT6328_DRV_CON0 0x022c +#define MT6328_DRV_CON1 0x022e +#define MT6328_DRV_CON2 0x0230 +#define MT6328_DRV_CON3 0x0232 +#define MT6328_TOP_STATUS 0x0234 +#define MT6328_TOP_STATUS_SET 0x0236 +#define MT6328_TOP_STATUS_CLR 0x0238 +#define MT6328_RGS_ANA_MON 0x023a +#define MT6328_TOP_CKPDN_CON0 0x023c +#define MT6328_TOP_CKPDN_CON0_SET 0x023e +#define MT6328_TOP_CKPDN_CON0_CLR 0x0240 +#define MT6328_TOP_CKPDN_CON1 0x0242 +#define MT6328_TOP_CKPDN_CON1_SET 0x0244 +#define MT6328_TOP_CKPDN_CON1_CLR 0x0246 +#define MT6328_TOP_CKPDN_CON2 0x0248 +#define MT6328_TOP_CKPDN_CON2_SET 0x024a +#define MT6328_TOP_CKPDN_CON2_CLR 0x024c +#define MT6328_TOP_CKPDN_CON3 0x024e +#define MT6328_TOP_CKPDN_CON3_SET 0x0250 +#define MT6328_TOP_CKPDN_CON3_CLR 0x0252 +#define MT6328_TOP_CKPDN_CON4 0x0254 +#define MT6328_TOP_CKPDN_CON4_SET 0x0256 +#define MT6328_TOP_CKPDN_CON4_CLR 0x0258 +#define MT6328_TOP_CKSEL_CON0 0x025a +#define MT6328_TOP_CKSEL_CON0_SET 0x025c +#define MT6328_TOP_CKSEL_CON0_CLR 0x025e +#define MT6328_TOP_CKSEL_CON1 0x0260 +#define MT6328_TOP_CKSEL_CON1_SET 0x0262 +#define MT6328_TOP_CKSEL_CON1_CLR 0x0264 +#define MT6328_TOP_CKSEL_CON2 0x0266 +#define MT6328_TOP_CKSEL_CON2_SET 0x0268 +#define MT6328_TOP_CKSEL_CON2_CLR 0x026a +#define MT6328_TOP_CKDIVSEL_CON0 0x026c +#define MT6328_TOP_CKDIVSEL_CON0_SET 0x026e +#define MT6328_TOP_CKDIVSEL_CON0_CLR 0x0270 +#define MT6328_TOP_CKDIVSEL_CON1 0x0272 +#define MT6328_TOP_CKDIVSEL_CON1_SET 0x0274 +#define MT6328_TOP_CKDIVSEL_CON1_CLR 0x0276 +#define MT6328_TOP_CKHWEN_CON0 0x0278 +#define MT6328_TOP_CKHWEN_CON0_SET 0x027a +#define MT6328_TOP_CKHWEN_CON0_CLR 0x027c +#define MT6328_TOP_CKHWEN_CON1 0x027e +#define MT6328_TOP_CKHWEN_CON1_SET 0x0280 +#define MT6328_TOP_CKHWEN_CON1_CLR 0x0282 +#define MT6328_TOP_CKTST_CON0 0x0284 +#define MT6328_TOP_CKTST_CON1 0x0286 +#define MT6328_TOP_CKTST_CON2 0x0288 +#define MT6328_TOP_CLKSQ 0x028a +#define MT6328_TOP_CLKSQ_SET 0x028c +#define MT6328_TOP_CLKSQ_CLR 0x028e +#define MT6328_TOP_CLKSQ_RTC 0x0290 +#define MT6328_TOP_CLKSQ_RTC_SET 0x0292 +#define MT6328_TOP_CLKSQ_RTC_CLR 0x0294 +#define MT6328_TOP_CLK_TRIM 0x0296 +#define MT6328_TOP_RST_CON0 0x0298 +#define MT6328_TOP_RST_CON0_SET 0x029a +#define MT6328_TOP_RST_CON0_CLR 0x029c +#define MT6328_TOP_RST_CON1 0x029e +#define MT6328_TOP_RST_MISC 0x02a0 +#define MT6328_TOP_RST_MISC_SET 0x02a2 +#define MT6328_TOP_RST_MISC_CLR 0x02a4 +#define MT6328_TOP_RST_STATUS 0x02a6 +#define MT6328_TOP_RST_STATUS_SET 0x02a8 +#define MT6328_TOP_RST_STATUS_CLR 0x02aa +#define MT6328_INT_CON0 0x02ac +#define MT6328_INT_CON0_SET 0x02ae +#define MT6328_INT_CON0_CLR 0x02b0 +#define MT6328_INT_CON1 0x02b2 +#define MT6328_INT_CON1_SET 0x02b4 +#define MT6328_INT_CON1_CLR 0x02b6 +#define MT6328_INT_CON2 0x02b8 +#define MT6328_INT_CON2_SET 0x02ba +#define MT6328_INT_CON2_CLR 0x02bc +#define MT6328_INT_MISC_CON 0x02be +#define MT6328_INT_MISC_CON_SET 0x02c0 +#define MT6328_INT_MISC_CON_CLR 0x02c2 +#define MT6328_INT_STATUS0 0x02c4 +#define MT6328_INT_STATUS1 0x02c6 +#define MT6328_INT_STATUS2 0x02c8 +#define MT6328_OC_GEAR_0 0x02ca +#define MT6328_FQMTR_CON0 0x02cc +#define MT6328_FQMTR_CON1 0x02ce +#define MT6328_FQMTR_CON2 0x02d0 +#define MT6328_RG_SPI_CON 0x02d2 +#define MT6328_DEW_DIO_EN 0x02d4 +#define MT6328_DEW_READ_TEST 0x02d6 +#define MT6328_DEW_WRITE_TEST 0x02d8 +#define MT6328_DEW_CRC_SWRST 0x02da +#define MT6328_DEW_CRC_EN 0x02dc +#define MT6328_DEW_CRC_VAL 0x02de +#define MT6328_DEW_DBG_MON_SEL 0x02e0 +#define MT6328_DEW_CIPHER_KEY_SEL 0x02e2 +#define MT6328_DEW_CIPHER_IV_SEL 0x02e4 +#define MT6328_DEW_CIPHER_EN 0x02e6 +#define MT6328_DEW_CIPHER_RDY 0x02e8 +#define MT6328_DEW_CIPHER_MODE 0x02ea +#define MT6328_DEW_CIPHER_SWRST 0x02ec +#define MT6328_DEW_RDDMY_NO 0x02ee +#define MT6328_INT_TYPE_CON0 0x02f0 +#define MT6328_INT_TYPE_CON0_SET 0x02f2 +#define MT6328_INT_TYPE_CON0_CLR 0x02f4 +#define MT6328_INT_TYPE_CON1 0x02f6 +#define MT6328_INT_TYPE_CON1_SET 0x02f8 +#define MT6328_INT_TYPE_CON1_CLR 0x02fa +#define MT6328_INT_TYPE_CON2 0x02fc +#define MT6328_INT_TYPE_CON2_SET 0x02fe +#define MT6328_INT_TYPE_CON2_CLR 0x0300 +#define MT6328_INT_STA 0x0302 +#define MT6328_BUCK_ALL_CON0 0x0400 +#define MT6328_BUCK_ALL_CON1 0x0402 +#define MT6328_BUCK_ALL_CON2 0x0404 +#define MT6328_BUCK_ALL_CON3 0x0406 +#define MT6328_BUCK_ALL_CON4 0x0408 +#define MT6328_BUCK_ALL_CON5 0x040a +#define MT6328_BUCK_ALL_CON6 0x040c +#define MT6328_BUCK_ALL_CON9 0x040e +#define MT6328_BUCK_ALL_CON12 0x0410 +#define MT6328_BUCK_ALL_CON13 0x0412 +#define MT6328_BUCK_ALL_CON14 0x0414 +#define MT6328_BUCK_ALL_CON16 0x0416 +#define MT6328_BUCK_ALL_CON18 0x0418 +#define MT6328_BUCK_ALL_CON19 0x041a +#define MT6328_BUCK_ALL_CON20 0x041c +#define MT6328_BUCK_ALL_CON21 0x041e +#define MT6328_BUCK_ALL_CON22 0x0420 +#define MT6328_BUCK_ALL_CON23 0x0422 +#define MT6328_BUCK_ALL_CON24 0x0424 +#define MT6328_BUCK_ALL_CON25 0x0426 +#define MT6328_BUCK_ALL_CON26 0x0428 +#define MT6328_BUCK_ALL_CON27 0x042a +#define MT6328_BUCK_ALL_CON28 0x042c +#define MT6328_SMPS_TOP_ANA_CON0 0x042e +#define MT6328_SMPS_TOP_ANA_CON1 0x0430 +#define MT6328_SMPS_TOP_ANA_CON2 0x0432 +#define MT6328_SMPS_TOP_ANA_CON3 0x0434 +#define MT6328_SMPS_TOP_ANA_CON4 0x0436 +#define MT6328_SMPS_TOP_ANA_CON5 0x0438 +#define MT6328_SMPS_TOP_ANA_CON6 0x043a +#define MT6328_SMPS_TOP_ANA_CON7 0x043c +#define MT6328_SMPS_TOP_ANA_CON8 0x043e +#define MT6328_VCORE_ANA_CON0 0x0440 +#define MT6328_VCORE_ANA_CON1 0x0442 +#define MT6328_VCORE_ANA_CON2 0x0444 +#define MT6328_VCORE_ANA_CON3 0x0446 +#define MT6328_VCORE_ANA_CON4 0x0448 +#define MT6328_VSYS22_ANA_CON0 0x044a +#define MT6328_VSYS22_ANA_CON1 0x044c +#define MT6328_VSYS22_ANA_CON2 0x044e +#define MT6328_VSYS22_ANA_CON3 0x0450 +#define MT6328_VSYS22_ANA_CON4 0x0452 +#define MT6328_VPROC_ANA_CON0 0x0454 +#define MT6328_VPROC_ANA_CON1 0x0456 +#define MT6328_VPROC_ANA_CON2 0x0458 +#define MT6328_VPROC_ANA_CON3 0x045a +#define MT6328_VPROC_ANA_CON4 0x045c +#define MT6328_OSC32_ANA_CON0 0x045e +#define MT6328_OSC32_ANA_CON1 0x0460 +#define MT6328_VPA_ANA_CON0 0x0462 +#define MT6328_VPA_ANA_CON1 0x0464 +#define MT6328_VPA_ANA_CON2 0x0466 +#define MT6328_VPA_ANA_CON3 0x0468 +#define MT6328_VLTE_ANA_CON0 0x046a +#define MT6328_VLTE_ANA_CON1 0x046c +#define MT6328_VLTE_ANA_CON2 0x046e +#define MT6328_VLTE_ANA_CON3 0x0470 +#define MT6328_VLTE_ANA_CON4 0x0472 +#define MT6328_VPROC_CON0 0x0474 +#define MT6328_VPROC_CON1 0x0476 +#define MT6328_VPROC_CON2 0x0478 +#define MT6328_VPROC_CON3 0x047a +#define MT6328_VPROC_CON4 0x047c +#define MT6328_VPROC_CON5 0x047e +#define MT6328_VPROC_CON6 0x0480 +#define MT6328_VPROC_CON7 0x0482 +#define MT6328_VPROC_CON8 0x0484 +#define MT6328_VPROC_CON9 0x0486 +#define MT6328_VPROC_CON10 0x0488 +#define MT6328_VPROC_CON11 0x048a +#define MT6328_VPROC_CON12 0x048c +#define MT6328_VPROC_CON13 0x048e +#define MT6328_VPROC_CON14 0x0490 +#define MT6328_VPROC_CON15 0x0492 +#define MT6328_VPROC_CON16 0x0494 +#define MT6328_VPROC_CON17 0x0496 +#define MT6328_VPROC_CON18 0x0498 +#define MT6328_VPROC_CON19 0x049a +#define MT6328_VSRAM_CON0 0x049c +#define MT6328_VSRAM_CON1 0x049e +#define MT6328_VSRAM_CON2 0x04a0 +#define MT6328_VSRAM_CON3 0x04a2 +#define MT6328_VSRAM_CON4 0x04a4 +#define MT6328_VSRAM_CON5 0x04a6 +#define MT6328_VSRAM_CON6 0x04a8 +#define MT6328_VSRAM_CON7 0x04aa +#define MT6328_VSRAM_CON8 0x04ac +#define MT6328_VSRAM_CON9 0x04ae +#define MT6328_VSRAM_CON10 0x04b0 +#define MT6328_VSRAM_CON11 0x04b2 +#define MT6328_VSRAM_CON12 0x04b4 +#define MT6328_VSRAM_CON13 0x04b6 +#define MT6328_VSRAM_CON14 0x04b8 +#define MT6328_VSRAM_CON15 0x04ba +#define MT6328_VSRAM_CON16 0x04bc +#define MT6328_VSRAM_CON17 0x04be +#define MT6328_VSRAM_CON18 0x04c0 +#define MT6328_VSRAM_CON19 0x04c2 +#define MT6328_VLTE_CON0 0x04c4 +#define MT6328_VLTE_CON1 0x04c6 +#define MT6328_VLTE_CON2 0x04c8 +#define MT6328_VLTE_CON3 0x04ca +#define MT6328_VLTE_CON4 0x04cc +#define MT6328_VLTE_CON5 0x04ce +#define MT6328_VLTE_CON6 0x04d0 +#define MT6328_VLTE_CON7 0x04d2 +#define MT6328_VLTE_CON8 0x04d4 +#define MT6328_VLTE_CON9 0x04d6 +#define MT6328_VLTE_CON10 0x04d8 +#define MT6328_VLTE_CON11 0x04da +#define MT6328_VLTE_CON12 0x04dc +#define MT6328_VLTE_CON13 0x04de +#define MT6328_VLTE_CON14 0x04e0 +#define MT6328_VLTE_CON15 0x04e2 +#define MT6328_VLTE_CON16 0x04e4 +#define MT6328_VLTE_CON17 0x04e6 +#define MT6328_VLTE_CON18 0x04e8 +#define MT6328_VLTE_CON19 0x04ea +#define MT6328_VCORE1_CON0 0x0600 +#define MT6328_VCORE1_CON1 0x0602 +#define MT6328_VCORE1_CON2 0x0604 +#define MT6328_VCORE1_CON3 0x0606 +#define MT6328_VCORE1_CON4 0x0608 +#define MT6328_VCORE1_CON5 0x060a +#define MT6328_VCORE1_CON6 0x060c +#define MT6328_VCORE1_CON7 0x060e +#define MT6328_VCORE1_CON8 0x0610 +#define MT6328_VCORE1_CON9 0x0612 +#define MT6328_VCORE1_CON10 0x0614 +#define MT6328_VCORE1_CON11 0x0616 +#define MT6328_VCORE1_CON12 0x0618 +#define MT6328_VCORE1_CON13 0x061a +#define MT6328_VCORE1_CON14 0x061c +#define MT6328_VCORE1_CON15 0x061e +#define MT6328_VCORE1_CON16 0x0620 +#define MT6328_VCORE1_CON17 0x0622 +#define MT6328_VCORE1_CON18 0x0624 +#define MT6328_VCORE1_CON19 0x0626 +#define MT6328_VSYS22_CON0 0x0628 +#define MT6328_VSYS22_CON1 0x062a +#define MT6328_VSYS22_CON2 0x062c +#define MT6328_VSYS22_CON3 0x062e +#define MT6328_VSYS22_CON4 0x0630 +#define MT6328_VSYS22_CON5 0x0632 +#define MT6328_VSYS22_CON6 0x0634 +#define MT6328_VSYS22_CON7 0x0636 +#define MT6328_VSYS22_CON8 0x0638 +#define MT6328_VSYS22_CON9 0x063a +#define MT6328_VSYS22_CON10 0x063c +#define MT6328_VSYS22_CON11 0x063e +#define MT6328_VSYS22_CON12 0x0640 +#define MT6328_VSYS22_CON13 0x0642 +#define MT6328_VSYS22_CON14 0x0644 +#define MT6328_VSYS22_CON15 0x0646 +#define MT6328_VSYS22_CON16 0x0648 +#define MT6328_VSYS22_CON17 0x064a +#define MT6328_VSYS22_CON18 0x064c +#define MT6328_VSYS22_CON19 0x064e +#define MT6328_VPA_CON0 0x0650 +#define MT6328_VPA_CON1 0x0652 +#define MT6328_VPA_CON2 0x0654 +#define MT6328_VPA_CON3 0x0656 +#define MT6328_VPA_CON4 0x0658 +#define MT6328_VPA_CON5 0x065a +#define MT6328_VPA_CON6 0x065c +#define MT6328_VPA_CON7 0x065e +#define MT6328_VPA_CON8 0x0660 +#define MT6328_VPA_CON9 0x0662 +#define MT6328_VPA_CON10 0x0664 +#define MT6328_VPA_CON11 0x0666 +#define MT6328_VPA_CON12 0x0668 +#define MT6328_VPA_CON13 0x066a +#define MT6328_VPA_CON14 0x066c +#define MT6328_VPA_CON15 0x066e +#define MT6328_VPA_CON16 0x0670 +#define MT6328_VPA_CON17 0x0672 +#define MT6328_VPA_CON18 0x0674 +#define MT6328_VPA_CON19 0x0676 +#define MT6328_VPA_CON20 0x0678 +#define MT6328_VPA_CON21 0x067a +#define MT6328_VPA_CON22 0x067c +#define MT6328_VPA_CON23 0x067e +#define MT6328_VPA_CON24 0x0680 +#define MT6328_BUCK_K_CON0 0x0682 +#define MT6328_BUCK_K_CON1 0x0684 +#define MT6328_BUCK_K_CON2 0x0686 +#define MT6328_BUCK_K_CON3 0x0688 +#define MT6328_ZCD_CON0 0x0800 +#define MT6328_ZCD_CON1 0x0802 +#define MT6328_ZCD_CON2 0x0804 +#define MT6328_ZCD_CON3 0x0806 +#define MT6328_ZCD_CON4 0x0808 +#define MT6328_ZCD_CON5 0x080a +#define MT6328_ISINK0_CON0 0x080c +#define MT6328_ISINK0_CON1 0x080e +#define MT6328_ISINK0_CON2 0x0810 +#define MT6328_ISINK0_CON3 0x0812 +#define MT6328_ISINK1_CON0 0x0814 +#define MT6328_ISINK1_CON1 0x0816 +#define MT6328_ISINK1_CON2 0x0818 +#define MT6328_ISINK1_CON3 0x081a +#define MT6328_ISINK2_CON1 0x081c +#define MT6328_ISINK3_CON1 0x081e +#define MT6328_ISINK_ANA0 0x0820 +#define MT6328_ISINK_ANA1 0x0822 +#define MT6328_ISINK_PHASE_DLY 0x0824 +#define MT6328_ISINK_SFSTR 0x0826 +#define MT6328_ISINK_EN_CTRL 0x0828 +#define MT6328_ISINK_MODE_CTRL 0x082a +#define MT6328_VTCXO_0_CON0 0x0a00 +#define MT6328_VTCXO_1_CON0 0x0a02 +#define MT6328_VAUD28_CON0 0x0a04 +#define MT6328_VAUX18_CON0 0x0a06 +#define MT6328_VRF18_0_CON0 0x0a08 +#define MT6328_VRF18_0_CON1 0x0a0a +#define MT6328_VCAMA_CON0 0x0a0c +#define MT6328_VCN28_CON0 0x0a0e +#define MT6328_VCN33_CON0 0x0a10 +#define MT6328_VCN33_CON1 0x0a12 +#define MT6328_VCN33_CON2 0x0a14 +#define MT6328_VRF18_1_CON0 0x0a16 +#define MT6328_VRF18_1_CON1 0x0a18 +#define MT6328_VUSB33_CON0 0x0a1a +#define MT6328_VMCH_CON0 0x0a1c +#define MT6328_VMCH_CON1 0x0a1e +#define MT6328_VMC_CON0 0x0a20 +#define MT6328_VMC_CON1 0x0a22 +#define MT6328_VEMC_3V3_CON0 0x0a24 +#define MT6328_VEMC_3V3_CON1 0x0a26 +#define MT6328_VIO28_CON0 0x0a28 +#define MT6328_VCAMAF_CON0 0x0a2a +#define MT6328_VGP1_CON0 0x0a2c +#define MT6328_VGP1_CON1 0x0a2e +#define MT6328_VEFUSE_CON0 0x0a30 +#define MT6328_VSIM1_CON0 0x0a32 +#define MT6328_VSIM2_CON0 0x0a34 +#define MT6328_VIO18_CON0 0x0a36 +#define MT6328_VIBR_CON0 0x0a38 +#define MT6328_VCN18_CON0 0x0a3a +#define MT6328_VCAM_CON0 0x0a3c +#define MT6328_VCAMIO_CON0 0x0a3e +#define MT6328_LDO_VSRAM_CON0 0x0a40 +#define MT6328_LDO_VSRAM_CON1 0x0a42 +#define MT6328_VTREF_CON0 0x0a44 +#define MT6328_VM_CON0 0x0a46 +#define MT6328_VM_CON1 0x0a48 +#define MT6328_VRTC_CON0 0x0a4a +#define MT6328_LDO_OCFB0 0x0a4c +#define MT6328_ALDO_ANA_CON0 0x0a4e +#define MT6328_ADLDO_ANA_CON1 0x0a50 +#define MT6328_ADLDO_ANA_CON2 0x0a52 +#define MT6328_ADLDO_ANA_CON3 0x0a54 +#define MT6328_ADLDO_ANA_CON4 0x0a56 +#define MT6328_ADLDO_ANA_CON5 0x0a58 +#define MT6328_ADLDO_ANA_CON6 0x0a5a +#define MT6328_ADLDO_ANA_CON7 0x0a5c +#define MT6328_ADLDO_ANA_CON8 0x0a5e +#define MT6328_ADLDO_ANA_CON9 0x0a60 +#define MT6328_ADLDO_ANA_CON10 0x0a62 +#define MT6328_ADLDO_ANA_CON11 0x0a64 +#define MT6328_ADLDO_ANA_CON12 0x0a66 +#define MT6328_ADLDO_ANA_CON13 0x0a68 +#define MT6328_DLDO_ANA_CON0 0x0a6a +#define MT6328_DLDO_ANA_CON1 0x0a6c +#define MT6328_DLDO_ANA_CON2 0x0a6e +#define MT6328_DLDO_ANA_CON3 0x0a70 +#define MT6328_DLDO_ANA_CON4 0x0a72 +#define MT6328_DLDO_ANA_CON5 0x0a74 +#define MT6328_SLDO_ANA_CON0 0x0a76 +#define MT6328_SLDO_ANA_CON1 0x0a78 +#define MT6328_SLDO_ANA_CON2 0x0a7a +#define MT6328_SLDO_ANA_CON3 0x0a7c +#define MT6328_SLDO_ANA_CON4 0x0a7e +#define MT6328_SLDO_ANA_CON5 0x0a80 +#define MT6328_SLDO_ANA_CON6 0x0a82 +#define MT6328_SLDO_ANA_CON7 0x0a84 +#define MT6328_SLDO_ANA_CON8 0x0a86 +#define MT6328_SLDO_ANA_CON9 0x0a88 +#define MT6328_SLDO_ANA_CON10 0x0a8a +#define MT6328_LDO_RSV_CON0 0x0a8c +#define MT6328_LDO_RSV_CON1 0x0a8e +#define MT6328_SPK_CON0 0x0a90 +#define MT6328_SPK_CON1 0x0a92 +#define MT6328_SPK_CON2 0x0a94 +#define MT6328_SPK_CON3 0x0a96 +#define MT6328_SPK_CON4 0x0a98 +#define MT6328_SPK_CON5 0x0a9a +#define MT6328_SPK_CON6 0x0a9c +#define MT6328_SPK_CON7 0x0a9e +#define MT6328_SPK_CON8 0x0aa0 +#define MT6328_SPK_CON9 0x0aa2 +#define MT6328_SPK_CON10 0x0aa4 +#define MT6328_SPK_CON11 0x0aa6 +#define MT6328_SPK_CON12 0x0aa8 +#define MT6328_SPK_CON13 0x0aaa +#define MT6328_SPK_CON14 0x0aac +#define MT6328_SPK_CON15 0x0aae +#define MT6328_SPK_CON16 0x0ab0 +#define MT6328_SPK_ANA_CON0 0x0ab2 +#define MT6328_SPK_ANA_CON1 0x0ab4 +#define MT6328_SPK_ANA_CON3 0x0ab6 +#define MT6328_OTP_CON0 0x0c00 +#define MT6328_OTP_CON1 0x0c02 +#define MT6328_OTP_CON2 0x0c04 +#define MT6328_OTP_CON3 0x0c06 +#define MT6328_OTP_CON4 0x0c08 +#define MT6328_OTP_CON5 0x0c0a +#define MT6328_OTP_CON6 0x0c0c +#define MT6328_OTP_CON7 0x0c0e +#define MT6328_OTP_CON8 0x0c10 +#define MT6328_OTP_CON9 0x0c12 +#define MT6328_OTP_CON10 0x0c14 +#define MT6328_OTP_CON11 0x0c16 +#define MT6328_OTP_CON12 0x0c18 +#define MT6328_OTP_CON13 0x0c1a +#define MT6328_OTP_CON14 0x0c1c +#define MT6328_OTP_DOUT_0_15 0x0c1e +#define MT6328_OTP_DOUT_16_31 0x0c20 +#define MT6328_OTP_DOUT_32_47 0x0c22 +#define MT6328_OTP_DOUT_48_63 0x0c24 +#define MT6328_OTP_DOUT_64_79 0x0c26 +#define MT6328_OTP_DOUT_80_95 0x0c28 +#define MT6328_OTP_DOUT_96_111 0x0c2a +#define MT6328_OTP_DOUT_112_127 0x0c2c +#define MT6328_OTP_DOUT_128_143 0x0c2e +#define MT6328_OTP_DOUT_144_159 0x0c30 +#define MT6328_OTP_DOUT_160_175 0x0c32 +#define MT6328_OTP_DOUT_176_191 0x0c34 +#define MT6328_OTP_DOUT_192_207 0x0c36 +#define MT6328_OTP_DOUT_208_223 0x0c38 +#define MT6328_OTP_DOUT_224_239 0x0c3a +#define MT6328_OTP_DOUT_240_255 0x0c3c +#define MT6328_OTP_DOUT_256_271 0x0c3e +#define MT6328_OTP_DOUT_272_287 0x0c40 +#define MT6328_OTP_DOUT_288_303 0x0c42 +#define MT6328_OTP_DOUT_304_319 0x0c44 +#define MT6328_OTP_DOUT_320_335 0x0c46 +#define MT6328_OTP_DOUT_336_351 0x0c48 +#define MT6328_OTP_DOUT_352_367 0x0c4a +#define MT6328_OTP_DOUT_368_383 0x0c4c +#define MT6328_OTP_DOUT_384_399 0x0c4e +#define MT6328_OTP_DOUT_400_415 0x0c50 +#define MT6328_OTP_DOUT_416_431 0x0c52 +#define MT6328_OTP_DOUT_432_447 0x0c54 +#define MT6328_OTP_DOUT_448_463 0x0c56 +#define MT6328_OTP_DOUT_464_479 0x0c58 +#define MT6328_OTP_DOUT_480_495 0x0c5a +#define MT6328_OTP_DOUT_496_511 0x0c5c +#define MT6328_OTP_VAL_0_15 0x0c5e +#define MT6328_OTP_VAL_16_31 0x0c60 +#define MT6328_OTP_VAL_32_47 0x0c62 +#define MT6328_OTP_VAL_48_63 0x0c64 +#define MT6328_OTP_VAL_64_79 0x0c66 +#define MT6328_OTP_VAL_80_95 0x0c68 +#define MT6328_OTP_VAL_96_111 0x0c6a +#define MT6328_OTP_VAL_112_127 0x0c6c +#define MT6328_OTP_VAL_128_143 0x0c6e +#define MT6328_OTP_VAL_144_159 0x0c70 +#define MT6328_OTP_VAL_160_175 0x0c72 +#define MT6328_OTP_VAL_176_191 0x0c74 +#define MT6328_OTP_VAL_192_207 0x0c76 +#define MT6328_OTP_VAL_208_223 0x0c78 +#define MT6328_OTP_VAL_224_239 0x0c7a +#define MT6328_OTP_VAL_240_255 0x0c7c +#define MT6328_OTP_VAL_256_271 0x0c7e +#define MT6328_OTP_VAL_272_287 0x0c80 +#define MT6328_OTP_VAL_288_303 0x0c82 +#define MT6328_OTP_VAL_304_319 0x0c84 +#define MT6328_OTP_VAL_320_335 0x0c86 +#define MT6328_OTP_VAL_336_351 0x0c88 +#define MT6328_OTP_VAL_352_367 0x0c8a +#define MT6328_OTP_VAL_368_383 0x0c8c +#define MT6328_OTP_VAL_384_399 0x0c8e +#define MT6328_OTP_VAL_400_415 0x0c90 +#define MT6328_OTP_VAL_416_431 0x0c92 +#define MT6328_OTP_VAL_432_447 0x0c94 +#define MT6328_OTP_VAL_448_463 0x0c96 +#define MT6328_OTP_VAL_464_479 0x0c98 +#define MT6328_OTP_VAL_480_495 0x0c9a +#define MT6328_OTP_VAL_496_511 0x0c9c +#define MT6328_RTC_MIX_CON0 0x0c9e +#define MT6328_RTC_MIX_CON1 0x0ca0 +#define MT6328_RTC_MIX_CON2 0x0ca2 +#define MT6328_FGADC_CON0 0x0ca4 +#define MT6328_FGADC_CON1 0x0ca6 +#define MT6328_FGADC_CON2 0x0ca8 +#define MT6328_FGADC_CON3 0x0caa +#define MT6328_FGADC_CON4 0x0cac +#define MT6328_FGADC_CON5 0x0cae +#define MT6328_FGADC_CON6 0x0cb0 +#define MT6328_FGADC_CON7 0x0cb2 +#define MT6328_FGADC_CON8 0x0cb4 +#define MT6328_FGADC_CON9 0x0cb6 +#define MT6328_FGADC_CON10 0x0cb8 +#define MT6328_FGADC_CON11 0x0cba +#define MT6328_FGADC_CON12 0x0cbc +#define MT6328_FGADC_CON13 0x0cbe +#define MT6328_FGADC_CON14 0x0cc0 +#define MT6328_FGADC_CON15 0x0cc2 +#define MT6328_FGADC_CON16 0x0cc4 +#define MT6328_FGADC_CON17 0x0cc6 +#define MT6328_FGADC_CON18 0x0cc8 +#define MT6328_FGADC_CON19 0x0cca +#define MT6328_FGADC_CON20 0x0ccc +#define MT6328_FGADC_CON21 0x0cce +#define MT6328_FGADC_CON22 0x0cd0 +#define MT6328_FGADC_CON23 0x0cd2 +#define MT6328_FGADC_CON24 0x0cd4 +#define MT6328_FGADC_CON25 0x0cd6 +#define MT6328_FGADC_CON26 0x0cd8 +#define MT6328_FGADC_CON27 0x0cda +#define MT6328_AUDDEC_ANA_CON0 0x0cdc +#define MT6328_AUDDEC_ANA_CON1 0x0cde +#define MT6328_AUDDEC_ANA_CON2 0x0ce0 +#define MT6328_AUDDEC_ANA_CON3 0x0ce2 +#define MT6328_AUDDEC_ANA_CON4 0x0ce4 +#define MT6328_AUDDEC_ANA_CON5 0x0ce6 +#define MT6328_AUDDEC_ANA_CON6 0x0ce8 +#define MT6328_AUDDEC_ANA_CON7 0x0cea +#define MT6328_AUDDEC_ANA_CON8 0x0cec +#define MT6328_AUDENC_ANA_CON0 0x0cee +#define MT6328_AUDENC_ANA_CON1 0x0cf0 +#define MT6328_AUDENC_ANA_CON2 0x0cf2 +#define MT6328_AUDENC_ANA_CON3 0x0cf4 +#define MT6328_AUDENC_ANA_CON4 0x0cf6 +#define MT6328_AUDENC_ANA_CON5 0x0cf8 +#define MT6328_AUDENC_ANA_CON6 0x0cfa +#define MT6328_AUDENC_ANA_CON7 0x0cfc +#define MT6328_AUDENC_ANA_CON8 0x0cfe +#define MT6328_AUDENC_ANA_CON9 0x0d00 +#define MT6328_AUDENC_ANA_CON10 0x0d02 +#define MT6328_AUDNCP_CLKDIV_CON0 0x0d04 +#define MT6328_AUDNCP_CLKDIV_CON1 0x0d06 +#define MT6328_AUDNCP_CLKDIV_CON2 0x0d08 +#define MT6328_AUDNCP_CLKDIV_CON3 0x0d0a +#define MT6328_AUDNCP_CLKDIV_CON4 0x0d0c +#define MT6328_AUXADC_ADC0 0x0e00 +#define MT6328_AUXADC_ADC1 0x0e02 +#define MT6328_AUXADC_ADC2 0x0e04 +#define MT6328_AUXADC_ADC3 0x0e06 +#define MT6328_AUXADC_ADC4 0x0e08 +#define MT6328_AUXADC_ADC5 0x0e0a +#define MT6328_AUXADC_ADC6 0x0e0c +#define MT6328_AUXADC_ADC7 0x0e0e +#define MT6328_AUXADC_ADC8 0x0e10 +#define MT6328_AUXADC_ADC9 0x0e12 +#define MT6328_AUXADC_ADC10 0x0e14 +#define MT6328_AUXADC_ADC11 0x0e16 +#define MT6328_AUXADC_ADC12 0x0e18 +#define MT6328_AUXADC_ADC13 0x0e1a +#define MT6328_AUXADC_ADC14 0x0e1c +#define MT6328_AUXADC_ADC15 0x0e1e +#define MT6328_AUXADC_ADC16 0x0e20 +#define MT6328_AUXADC_ADC17 0x0e22 +#define MT6328_AUXADC_ADC18 0x0e24 +#define MT6328_AUXADC_ADC19 0x0e26 +#define MT6328_AUXADC_ADC20 0x0e28 +#define MT6328_AUXADC_ADC21 0x0e2a +#define MT6328_AUXADC_ADC22 0x0e2c +#define MT6328_AUXADC_ADC23 0x0e2e +#define MT6328_AUXADC_ADC24 0x0e30 +#define MT6328_AUXADC_ADC25 0x0e32 +#define MT6328_AUXADC_ADC26 0x0e34 +#define MT6328_AUXADC_ADC27 0x0e36 +#define MT6328_AUXADC_ADC28 0x0e38 +#define MT6328_AUXADC_ADC29 0x0e3a +#define MT6328_AUXADC_ADC30 0x0e3c +#define MT6328_AUXADC_ADC31 0x0e3e +#define MT6328_AUXADC_ADC32 0x0e40 +#define MT6328_AUXADC_ADC33 0x0e42 +#define MT6328_AUXADC_BUF0 0x0e44 +#define MT6328_AUXADC_BUF1 0x0e46 +#define MT6328_AUXADC_BUF2 0x0e48 +#define MT6328_AUXADC_BUF3 0x0e4a +#define MT6328_AUXADC_BUF4 0x0e4c +#define MT6328_AUXADC_BUF5 0x0e4e +#define MT6328_AUXADC_BUF6 0x0e50 +#define MT6328_AUXADC_BUF7 0x0e52 +#define MT6328_AUXADC_BUF8 0x0e54 +#define MT6328_AUXADC_BUF9 0x0e56 +#define MT6328_AUXADC_BUF10 0x0e58 +#define MT6328_AUXADC_BUF11 0x0e5a +#define MT6328_AUXADC_BUF12 0x0e5c +#define MT6328_AUXADC_BUF13 0x0e5e +#define MT6328_AUXADC_BUF14 0x0e60 +#define MT6328_AUXADC_BUF15 0x0e62 +#define MT6328_AUXADC_BUF16 0x0e64 +#define MT6328_AUXADC_BUF17 0x0e66 +#define MT6328_AUXADC_BUF18 0x0e68 +#define MT6328_AUXADC_BUF19 0x0e6a +#define MT6328_AUXADC_BUF20 0x0e6c +#define MT6328_AUXADC_BUF21 0x0e6e +#define MT6328_AUXADC_BUF22 0x0e70 +#define MT6328_AUXADC_BUF23 0x0e72 +#define MT6328_AUXADC_BUF24 0x0e74 +#define MT6328_AUXADC_BUF25 0x0e76 +#define MT6328_AUXADC_BUF26 0x0e78 +#define MT6328_AUXADC_BUF27 0x0e7a +#define MT6328_AUXADC_BUF28 0x0e7c +#define MT6328_AUXADC_BUF29 0x0e7e +#define MT6328_AUXADC_BUF30 0x0e80 +#define MT6328_AUXADC_BUF31 0x0e82 +#define MT6328_AUXADC_STA0 0x0e84 +#define MT6328_AUXADC_STA1 0x0e86 +#define MT6328_AUXADC_RQST0 0x0e88 +#define MT6328_AUXADC_RQST0_SET 0x0e8a +#define MT6328_AUXADC_RQST0_CLR 0x0e8c +#define MT6328_AUXADC_RQST1 0x0e8e +#define MT6328_AUXADC_RQST1_SET 0x0e90 +#define MT6328_AUXADC_RQST1_CLR 0x0e92 +#define MT6328_AUXADC_CON0 0x0e94 +#define MT6328_AUXADC_CON0_SET 0x0e96 +#define MT6328_AUXADC_CON0_CLR 0x0e98 +#define MT6328_AUXADC_CON1 0x0e9a +#define MT6328_AUXADC_CON2 0x0e9c +#define MT6328_AUXADC_CON3 0x0e9e +#define MT6328_AUXADC_CON4 0x0ea0 +#define MT6328_AUXADC_CON5 0x0ea2 +#define MT6328_AUXADC_CON6 0x0ea4 +#define MT6328_AUXADC_CON7 0x0ea6 +#define MT6328_AUXADC_CON8 0x0ea8 +#define MT6328_AUXADC_CON9 0x0eaa +#define MT6328_AUXADC_CON10 0x0eac +#define MT6328_AUXADC_CON11 0x0eae +#define MT6328_AUXADC_CON12 0x0eb0 +#define MT6328_AUXADC_CON13 0x0eb2 +#define MT6328_AUXADC_CON14 0x0eb4 +#define MT6328_AUXADC_CON15 0x0eb6 +#define MT6328_AUXADC_CON16 0x0eb8 +#define MT6328_AUXADC_AUTORPT0 0x0eba +#define MT6328_AUXADC_LBAT0 0x0ebc +#define MT6328_AUXADC_LBAT1 0x0ebe +#define MT6328_AUXADC_LBAT2 0x0ec0 +#define MT6328_AUXADC_LBAT3 0x0ec2 +#define MT6328_AUXADC_LBAT4 0x0ec4 +#define MT6328_AUXADC_LBAT5 0x0ec6 +#define MT6328_AUXADC_LBAT6 0x0ec8 +#define MT6328_AUXADC_ACCDET 0x0eca +#define MT6328_AUXADC_THR0 0x0ecc +#define MT6328_AUXADC_THR1 0x0ece +#define MT6328_AUXADC_THR2 0x0ed0 +#define MT6328_AUXADC_THR3 0x0ed2 +#define MT6328_AUXADC_THR4 0x0ed4 +#define MT6328_AUXADC_THR5 0x0ed6 +#define MT6328_AUXADC_THR6 0x0ed8 +#define MT6328_AUXADC_EFUSE0 0x0eda +#define MT6328_AUXADC_EFUSE1 0x0edc +#define MT6328_AUXADC_EFUSE2 0x0ede +#define MT6328_AUXADC_EFUSE3 0x0ee0 +#define MT6328_AUXADC_EFUSE4 0x0ee2 +#define MT6328_AUXADC_EFUSE5 0x0ee4 +#define MT6328_AUXADC_DBG0 0x0ee6 +#define MT6328_AUXADC_IMP0 0x0ee8 +#define MT6328_AUXADC_IMP1 0x0eea +#define MT6328_AUXADC_VISMPS0_1 0x0eec +#define MT6328_AUXADC_VISMPS0_2 0x0eee +#define MT6328_AUXADC_VISMPS0_3 0x0ef0 +#define MT6328_AUXADC_VISMPS0_4 0x0ef2 +#define MT6328_AUXADC_VISMPS0_5 0x0ef4 +#define MT6328_AUXADC_VISMPS0_6 0x0ef6 +#define MT6328_AUXADC_VISMPS0_7 0x0ef8 +#define MT6328_AUXADC_LBAT2_1 0x0efa +#define MT6328_AUXADC_LBAT2_2 0x0efc +#define MT6328_AUXADC_LBAT2_3 0x0efe +#define MT6328_AUXADC_LBAT2_4 0x0f00 +#define MT6328_AUXADC_LBAT2_5 0x0f02 +#define MT6328_AUXADC_LBAT2_6 0x0f04 +#define MT6328_AUXADC_LBAT2_7 0x0f06 +#define MT6328_AUXADC_MDBG_0 0x0f08 +#define MT6328_AUXADC_MDBG_1 0x0f0a +#define MT6328_AUXADC_MDBG_2 0x0f0c +#define MT6328_AUXADC_MDRT_0 0x0f0e +#define MT6328_AUXADC_MDRT_1 0x0f10 +#define MT6328_AUXADC_MDRT_2 0x0f12 +#define MT6328_ACCDET_CON0 0x0f14 +#define MT6328_ACCDET_CON1 0x0f16 +#define MT6328_ACCDET_CON2 0x0f18 +#define MT6328_ACCDET_CON3 0x0f1a +#define MT6328_ACCDET_CON4 0x0f1c +#define MT6328_ACCDET_CON5 0x0f1e +#define MT6328_ACCDET_CON6 0x0f20 +#define MT6328_ACCDET_CON7 0x0f22 +#define MT6328_ACCDET_CON8 0x0f24 +#define MT6328_ACCDET_CON9 0x0f26 +#define MT6328_ACCDET_CON10 0x0f28 +#define MT6328_ACCDET_CON11 0x0f2a +#define MT6328_ACCDET_CON12 0x0f2c +#define MT6328_ACCDET_CON13 0x0f2e +#define MT6328_ACCDET_CON14 0x0f30 +#define MT6328_ACCDET_CON15 0x0f32 +#define MT6328_ACCDET_CON16 0x0f34 +#define MT6328_ACCDET_CON17 0x0f36 +#define MT6328_ACCDET_CON18 0x0f38 +#define MT6328_ACCDET_CON19 0x0f3a +#define MT6328_ACCDET_CON20 0x0f3c +#define MT6328_ACCDET_CON21 0x0f3e +#define MT6328_ACCDET_CON22 0x0f40 +#define MT6328_ACCDET_CON23 0x0f42 +#define MT6328_ACCDET_CON24 0x0f44 +#define MT6328_ACCDET_CON25 0x0f46 +#define MT6328_CHR_CON0 0x0f48 +#define MT6328_CHR_CON1 0x0f4a +#define MT6328_CHR_CON2 0x0f4c +#define MT6328_CHR_CON3 0x0f4e +#define MT6328_CHR_CON4 0x0f50 +#define MT6328_CHR_CON5 0x0f52 +#define MT6328_CHR_CON6 0x0f54 +#define MT6328_CHR_CON7 0x0f56 +#define MT6328_CHR_CON8 0x0f58 +#define MT6328_CHR_CON9 0x0f5a +#define MT6328_CHR_CON10 0x0f5c +#define MT6328_CHR_CON11 0x0f5e +#define MT6328_CHR_CON12 0x0f60 +#define MT6328_CHR_CON13 0x0f62 +#define MT6328_CHR_CON14 0x0f64 +#define MT6328_CHR_CON15 0x0f66 +#define MT6328_CHR_CON16 0x0f68 +#define MT6328_CHR_CON17 0x0f6a +#define MT6328_CHR_CON18 0x0f6c +#define MT6328_CHR_CON19 0x0f6e +#define MT6328_CHR_CON20 0x0f70 +#define MT6328_CHR_CON21 0x0f72 +#define MT6328_CHR_CON22 0x0f74 +#define MT6328_CHR_CON23 0x0f76 +#define MT6328_CHR_CON24 0x0f78 +#define MT6328_CHR_CON25 0x0f7a +#define MT6328_CHR_CON26 0x0f7c +#define MT6328_CHR_CON27 0x0f7e +#define MT6328_CHR_CON28 0x0f80 +#define MT6328_CHR_CON29 0x0f82 +#define MT6328_CHR_CON30 0x0f84 +#define MT6328_CHR_CON31 0x0f86 +#define MT6328_CHR_CON32 0x0f88 +#define MT6328_CHR_CON33 0x0f8a +#define MT6328_CHR_CON34 0x0f8c +#define MT6328_CHR_CON35 0x0f8e +#define MT6328_CHR_CON36 0x0f90 +#define MT6328_CHR_CON37 0x0f92 +#define MT6328_CHR_CON38 0x0f94 +#define MT6328_CHR_CON39 0x0f96 +#define MT6328_CHR_CON40 0x0f98 +#define MT6328_CHR_CON41 0x0f9a +#define MT6328_CHR_CON42 0x0f9c +#define MT6328_BATON_CON0 0x0f9e +#define MT6328_CHR_CON43 0x0fa0 +#define MT6328_EOSC_CALI_CON0 0x0faa +#define MT6328_EOSC_CALI_CON1 0x0fac +#define MT6328_VRTC_PWM_CON0 0x0fae + +#endif /* __MFD_MT6328_REGISTERS_H__ */ diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h index 627487e26287..b774c3a4bb62 100644 --- a/include/linux/mfd/mt6397/core.h +++ b/include/linux/mfd/mt6397/core.h @@ -12,6 +12,7 @@ enum chip_id { MT6323_CHIP_ID = 0x23, + MT6328_CHIP_ID = 0x30, MT6331_CHIP_ID = 0x20, MT6332_CHIP_ID = 0x20, MT6357_CHIP_ID = 0x57, @@ -65,11 +66,11 @@ struct mt6397_chip { int irq; struct irq_domain *irq_domain; struct mutex irqlock; - u16 wake_mask[2]; - u16 irq_masks_cur[2]; - u16 irq_masks_cache[2]; - u16 int_con[2]; - u16 int_status[2]; + u16 wake_mask[3]; + u16 irq_masks_cur[3]; + u16 irq_masks_cache[3]; + u16 int_con[3]; + u16 int_status[3]; u16 chip_id; void *irq_data; }; -- cgit v1.2.3 From f187b9bf1a639090893c31030ddb60f9beae23f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Oct 2024 06:18:52 +0100 Subject: block: remove bio_add_zone_append_page This is only used by the nvmet zns passthrough code, which can trivially just use bio_add_pc_page and do the sanity check for the max zone append limit itself. All future zoned file systems should follow the btrfs lead and let the upper layers fill up bios unlimited by hardware constraints and split them to the limits in the I/O submission handler. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20241030051859.280923-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index faceadb040f9..4a1bf43ca53d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -418,8 +418,6 @@ bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); -int bio_add_zone_append_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, -- cgit v1.2.3 From 336afe0c832d6eb985d0e9dbc5a70929594e58d9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 25 Oct 2024 18:23:45 +0000 Subject: KVM: arm64: nv: Describe trap behaviour of MDCR_EL2.HPMN MDCR_EL2.HPMN splits the PMU event counters into two ranges: the first range is accessible from all ELs, and the second range is accessible only to EL2/3. Supposing the guest hypervisor allows direct access to the PMU counters from the L2, KVM needs to locally handle those accesses. Add a new complex trap configuration for HPMN that checks if the counter index is accessible to the current context. As written, the architecture suggests HPMN only causes PMEVCNTR_EL0 to trap, though intuition (and the pseudocode) suggest that the trap applies to PMEVTYPER_EL0 as well. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20241025182354.3364124-11-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_pmu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index e08aeec5d936..feb5d1d35f0f 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -96,6 +96,7 @@ int kvm_arm_set_default_pmu(struct kvm *kvm); u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm); u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu); +bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx); #else struct kvm_pmu { }; @@ -187,6 +188,11 @@ static inline u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) return 0; } +static inline bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +{ + return false; +} + #endif #endif -- cgit v1.2.3 From a3034dab74fc12d6c0a589e31af9fafc436a4a0e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 25 Oct 2024 18:23:47 +0000 Subject: KVM: arm64: Rename kvm_pmu_valid_counter_mask() Nested PMU support requires dynamically changing the visible range of PMU counters based on the exception level and value of MDCR_EL2.HPMN. At the same time, the PMU emulation code needs to know the absolute number of implemented counters, regardless of context. Rename the existing helper to make it obvious that it returns the number of implemented counters and not anything else. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20241025182354.3364124-13-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_pmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index feb5d1d35f0f..019ace2bb914 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -47,7 +47,7 @@ static __always_inline bool kvm_arm_support_pmu_v3(void) #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); -u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); +u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu); u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1); void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); @@ -114,7 +114,7 @@ static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, } static inline void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) {} -static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) +static inline u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu) { return 0; } -- cgit v1.2.3 From 9a1c58cfefb06974a804174f127de3fedc779394 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 25 Oct 2024 18:23:48 +0000 Subject: KVM: arm64: nv: Adjust range of accessible PMCs according to HPMN The value of MDCR_EL2.HPMN controls the number of event counters made visible to EL0 and EL1. This means it is possible for the guest hypervisor to allow direct access to event counters to the L2. Rework KVM's PMU register emulation to take the effects of HPMN into account when handling a trap. For bitmask-style registers, writes only affect accessible registers. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20241025182354.3364124-14-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_pmu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 019ace2bb914..76244f0bd47a 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -48,6 +48,7 @@ static __always_inline bool kvm_arm_support_pmu_v3(void) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu); +u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu); u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1); void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); @@ -118,6 +119,10 @@ static inline u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu) { return 0; } +static inline u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu) +{ + return 0; +} static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} -- cgit v1.2.3 From ae323e035801def145776dddf46c01ca1b90d21d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 25 Oct 2024 18:25:59 +0000 Subject: KVM: arm64: nv: Reprogram PMU events affected by nested transition Start reprogramming PMU events at nested boundaries now that everything is in place to handle the EL2 event filter. Only repaint events where the filter differs between EL1 and EL2 as a slight optimization. PMU now 'works' for nested VMs, albeit slow. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20241025182559.3364829-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 76244f0bd47a..e61dd7dd2286 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -98,6 +98,7 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm); u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu); bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx); +void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu); #else struct kvm_pmu { }; @@ -198,6 +199,8 @@ static inline bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int id return false; } +static inline void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu) {} + #endif #endif -- cgit v1.2.3 From 16aed0a6520ba01b7d22c32e193fc1ec674f92d4 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 21 Oct 2024 11:45:06 -0400 Subject: i3c: master: Replace hard code 2 with macro I3C_ADDR_SLOT_STATUS_BITS Replace the hardcoded value 2, which indicates 2 bits for I3C address status, with the predefined macro I3C_ADDR_SLOT_STATUS_BITS. Improve maintainability and extensibility of the code. Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20241021-i3c_dts_assign-v8-1-4098b8bde01e@nxp.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 2a1ed05d5782..2100547b2d8d 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -313,6 +313,8 @@ enum i3c_addr_slot_status { I3C_ADDR_SLOT_STATUS_MASK = 3, }; +#define I3C_ADDR_SLOT_STATUS_BITS 2 + /** * struct i3c_bus - I3C bus object * @cur_master: I3C master currently driving the bus. Since I3C is multi-master @@ -354,7 +356,7 @@ enum i3c_addr_slot_status { struct i3c_bus { struct i3c_dev_desc *cur_master; int id; - unsigned long addrslots[((I2C_MAX_ADDR + 1) * 2) / BITS_PER_LONG]; + unsigned long addrslots[((I2C_MAX_ADDR + 1) * I3C_ADDR_SLOT_STATUS_BITS) / BITS_PER_LONG]; enum i3c_bus_mode mode; struct { unsigned long i3c; -- cgit v1.2.3 From 2f552fa280590e61bd3dbe66a7b54b99caa642a4 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 21 Oct 2024 11:45:07 -0400 Subject: i3c: master: Extend address status bit to 4 and add I3C_ADDR_SLOT_EXT_DESIRED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the address status bit to 4 and introduce the I3C_ADDR_SLOT_EXT_DESIRED macro to indicate that a device prefers a specific address. This is generally set by the 'assigned-address' in the device tree source (dts) file. ┌────┬─────────────┬───┬─────────┬───┐ │S/Sr│ 7'h7E RnW=0 │ACK│ ENTDAA │ T ├────┐ └────┴─────────────┴───┴─────────┴───┘ │ ┌─────────────────────────────────────────┘ │ ┌──┬─────────────┬───┬─────────────────┬────────────────┬───┬─────────┐ └─►│Sr│7'h7E RnW=1 │ACK│48bit UID BCR DCR│Assign 7bit Addr│PAR│ ACK/NACK│ └──┴─────────────┴───┴─────────────────┴────────────────┴───┴─────────┘ Some master controllers (such as HCI) need to prepare the entire above transaction before sending it out to the I3C bus. This means that a 7-bit dynamic address needs to be allocated before knowing the target device's UID information. However, some I3C targets may request specific addresses (called as "init_dyn_addr"), which is typically specified by the DT-'s assigned-address property. Lower addresses having higher IBI priority. If it is available, i3c_bus_get_free_addr() preferably return a free address that is not in the list of desired addresses (called as "init_dyn_addr"). This allows the device with the "init_dyn_addr" to switch to its "init_dyn_addr" when it hot-joins the I3C bus. Otherwise, if the "init_dyn_addr" is already in use by another I3C device, the target device will not be able to switch to its desired address. If the previous step fails, fallback returning one of the remaining unassigned address, regardless of its state in the desired list. Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20241021-i3c_dts_assign-v8-2-4098b8bde01e@nxp.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 2100547b2d8d..6e5328c6c6af 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -298,7 +298,8 @@ enum i3c_open_drain_speed { * @I3C_ADDR_SLOT_I2C_DEV: address is assigned to an I2C device * @I3C_ADDR_SLOT_I3C_DEV: address is assigned to an I3C device * @I3C_ADDR_SLOT_STATUS_MASK: address slot mask - * + * @I3C_ADDR_SLOT_EXT_DESIRED: the bitmask represents addresses that are preferred by some devices, + * such as the "assigned-address" property in a device tree source. * On an I3C bus, addresses are assigned dynamically, and we need to know which * addresses are free to use and which ones are already assigned. * @@ -311,9 +312,11 @@ enum i3c_addr_slot_status { I3C_ADDR_SLOT_I2C_DEV, I3C_ADDR_SLOT_I3C_DEV, I3C_ADDR_SLOT_STATUS_MASK = 3, + I3C_ADDR_SLOT_EXT_STATUS_MASK = 7, + I3C_ADDR_SLOT_EXT_DESIRED = BIT(2), }; -#define I3C_ADDR_SLOT_STATUS_BITS 2 +#define I3C_ADDR_SLOT_STATUS_BITS 4 /** * struct i3c_bus - I3C bus object -- cgit v1.2.3 From 128d333f0dff2fbe41c546581c6f151e9d68cd4c Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 17 Oct 2024 10:31:53 -0700 Subject: f2fs: introduce device aliasing file F2FS should understand how the device aliasing file works and support deleting the file after use. A device aliasing file can be created by mkfs.f2fs tool and it can map the whole device with an extent, not using node blocks. The file space should be pinned and normally used for read-only usages. Signed-off-by: Daeho Jeong Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/uapi/linux/f2fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 955d440be104..f7aaf8d23e20 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -43,6 +43,7 @@ #define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) #define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25) +#define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32) /* * should be same as XFS_IOC_GOINGDOWN. -- cgit v1.2.3 From d86c7a9162ae925dcb6632a4544c62eb0eb5c7cc Mon Sep 17 00:00:00 2001 From: George Guo Date: Mon, 28 Oct 2024 20:34:35 +0800 Subject: netlabel: document doi_remove field of struct netlbl_calipso_ops Add documentation of doi_remove field to Kernel doc for struct netlbl_calipso_ops. Flagged by ./scripts/kernel-doc -none. Signed-off-by: George Guo Acked-by: Paul Moore Link: https://patch.msgid.link/20241028123435.3495916-1-dongtai.guo@linux.dev Signed-off-by: Jakub Kicinski --- include/net/netlabel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 529160f76cac..4afd934b1238 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -208,6 +208,7 @@ struct netlbl_lsm_secattr { * struct netlbl_calipso_ops - NetLabel CALIPSO operations * @doi_add: add a CALIPSO DOI * @doi_free: free a CALIPSO DOI + * @doi_remove: remove a CALIPSO DOI * @doi_getdef: returns a reference to a DOI * @doi_putdef: releases a reference of a DOI * @doi_walk: enumerate the DOI list -- cgit v1.2.3 From 4138e9ec00936c9fe7d0fe961e32f381b1e36926 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 29 Oct 2024 11:47:14 +0100 Subject: netlink: add NLA_POLICY_MAX_LEN macro Similarly to NLA_POLICY_MIN_LEN, NLA_POLICY_MAX_LEN defines a policy with a maximum length value. The netlink generator for YAML specs has been extended accordingly. Signed-off-by: Antonio Quartulli Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20241029-b4-ovpn-v11-1-de4698c73a25@openvpn.net Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index db6af207287c..2dc671c977ff 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -469,6 +469,7 @@ struct nla_policy { .max = _len \ } #define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len) +#define NLA_POLICY_MAX_LEN(_len) NLA_POLICY_MAX(NLA_BINARY, _len) /** * struct nl_info - netlink source information -- cgit v1.2.3 From a911bad094b010e276f072fe9a599b66e59ed5fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Oct 2024 19:14:25 +0000 Subject: dql: annotate data-races around dql->last_obj_cnt dql->last_obj_cnt is read/written from different contexts, without any lock synchronization. Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Link: https://patch.msgid.link/20241029191425.2519085-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/dynamic_queue_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 281298e77a15..808b1a5102e7 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -127,7 +127,7 @@ static inline void dql_queued(struct dql *dql, unsigned int count) if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) return; - dql->last_obj_cnt = count; + WRITE_ONCE(dql->last_obj_cnt, count); /* We want to force a write first, so that cpu do not attempt * to get cache line containing last_obj_cnt, num_queued, adj_limit -- cgit v1.2.3 From d3eeb1ac0b99cdcd99420fb270041da946d2d360 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 28 Oct 2024 22:45:30 +0100 Subject: iio: backend: extend features Extend backend features with new calls needed later on this patchset from axi version of ad3552r. The follwoing calls are added: iio_backend_ddr_enable() enable ddr bus transfer iio_backend_ddr_disable() disable ddr bus transfer iio_backend_data_stream_enable() enable data stream over bus interface iio_backend_data_stream_disable() disable data stream over bus interface iio_backend_data_transfer_addr() define the target register address where the DAC sample will be written. Reviewed-by: Nuno Sa Signed-off-by: Angelo Dureghello Reviewed-by: David Lechner Link: https://patch.msgid.link/20241028-wip-bl-ad3552r-axi-v0-iio-testing-v9-3-f6960b4f9719@kernel-space.org Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 37d56914d485..10be00f3b120 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -14,12 +14,14 @@ struct iio_dev; enum iio_backend_data_type { IIO_BACKEND_TWOS_COMPLEMENT, IIO_BACKEND_OFFSET_BINARY, + IIO_BACKEND_DATA_UNSIGNED, IIO_BACKEND_DATA_TYPE_MAX }; enum iio_backend_data_source { IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE, IIO_BACKEND_EXTERNAL, + IIO_BACKEND_INTERNAL_RAMP_16BIT, IIO_BACKEND_DATA_SOURCE_MAX }; @@ -89,6 +91,11 @@ enum iio_backend_sample_trigger { * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. + * @ddr_enable: Enable interface DDR (Double Data Rate) mode. + * @ddr_disable: Disable interface DDR (Double Data Rate) mode. + * @data_stream_enable: Enable data stream. + * @data_stream_disable: Disable data stream. + * @data_transfer_addr: Set data address. **/ struct iio_backend_ops { int (*enable)(struct iio_backend *back); @@ -129,6 +136,11 @@ struct iio_backend_ops { size_t len); int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg, unsigned int writeval, unsigned int *readval); + int (*ddr_enable)(struct iio_backend *back); + int (*ddr_disable)(struct iio_backend *back); + int (*data_stream_enable)(struct iio_backend *back); + int (*data_stream_disable)(struct iio_backend *back); + int (*data_transfer_addr)(struct iio_backend *back, u32 address); }; /** @@ -164,6 +176,11 @@ int iio_backend_data_sample_trigger(struct iio_backend *back, int devm_iio_backend_request_buffer(struct device *dev, struct iio_backend *back, struct iio_dev *indio_dev); +int iio_backend_ddr_enable(struct iio_backend *back); +int iio_backend_ddr_disable(struct iio_backend *back); +int iio_backend_data_stream_enable(struct iio_backend *back); +int iio_backend_data_stream_disable(struct iio_backend *back); +int iio_backend_data_transfer_addr(struct iio_backend *back, u32 address); ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, const char *buf, size_t len); -- cgit v1.2.3 From 76c6217c31266e800b67a476bba59dfeb9858a90 Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Wed, 23 Oct 2024 17:01:51 +0800 Subject: dt-bindings: mfd: aspeed: Support for AST2700 Add reset, clk dt bindings headers, and update compatible support for AST2700 clk, silicon-id in yaml. Signed-off-by: Ryan Chen Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241023090153.1395220-2-ryan_chen@aspeedtech.com Signed-off-by: Lee Jones --- include/dt-bindings/clock/aspeed,ast2700-scu.h | 163 +++++++++++++++++++++++++ include/dt-bindings/reset/aspeed,ast2700-scu.h | 124 +++++++++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 include/dt-bindings/clock/aspeed,ast2700-scu.h create mode 100644 include/dt-bindings/reset/aspeed,ast2700-scu.h (limited to 'include') diff --git a/include/dt-bindings/clock/aspeed,ast2700-scu.h b/include/dt-bindings/clock/aspeed,ast2700-scu.h new file mode 100644 index 000000000000..63021af3caf5 --- /dev/null +++ b/include/dt-bindings/clock/aspeed,ast2700-scu.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Device Tree binding constants for AST2700 clock controller. + * + * Copyright (c) 2024 Aspeed Technology Inc. + */ + +#ifndef __DT_BINDINGS_CLOCK_AST2700_H +#define __DT_BINDINGS_CLOCK_AST2700_H + +/* SOC0 clk */ +#define SCU0_CLKIN 0 +#define SCU0_CLK_24M 1 +#define SCU0_CLK_192M 2 +#define SCU0_CLK_UART 3 +#define SCU0_CLK_UART_DIV13 3 +#define SCU0_CLK_PSP 4 +#define SCU0_CLK_HPLL 5 +#define SCU0_CLK_HPLL_DIV2 6 +#define SCU0_CLK_HPLL_DIV4 7 +#define SCU0_CLK_HPLL_DIV_AHB 8 +#define SCU0_CLK_DPLL 9 +#define SCU0_CLK_MPLL 10 +#define SCU0_CLK_MPLL_DIV2 11 +#define SCU0_CLK_MPLL_DIV4 12 +#define SCU0_CLK_MPLL_DIV8 13 +#define SCU0_CLK_MPLL_DIV_AHB 14 +#define SCU0_CLK_D0 15 +#define SCU0_CLK_D1 16 +#define SCU0_CLK_CRT0 17 +#define SCU0_CLK_CRT1 18 +#define SCU0_CLK_MPHY 19 +#define SCU0_CLK_AXI0 20 +#define SCU0_CLK_AXI1 21 +#define SCU0_CLK_AHB 22 +#define SCU0_CLK_APB 23 +#define SCU0_CLK_UART4 24 +#define SCU0_CLK_EMMCMUX 25 +#define SCU0_CLK_EMMC 26 +#define SCU0_CLK_U2PHY_CLK12M 27 +#define SCU0_CLK_U2PHY_REFCLK 28 + +/* SOC0 clk-gate */ +#define SCU0_CLK_GATE_MCLK 29 +#define SCU0_CLK_GATE_ECLK 30 +#define SCU0_CLK_GATE_2DCLK 31 +#define SCU0_CLK_GATE_VCLK 32 +#define SCU0_CLK_GATE_BCLK 33 +#define SCU0_CLK_GATE_VGA0CLK 34 +#define SCU0_CLK_GATE_REFCLK 35 +#define SCU0_CLK_GATE_PORTBUSB2CLK 36 +#define SCU0_CLK_GATE_UHCICLK 37 +#define SCU0_CLK_GATE_VGA1CLK 38 +#define SCU0_CLK_GATE_DDRPHYCLK 39 +#define SCU0_CLK_GATE_E2M0CLK 40 +#define SCU0_CLK_GATE_HACCLK 41 +#define SCU0_CLK_GATE_PORTAUSB2CLK 42 +#define SCU0_CLK_GATE_UART4CLK 43 +#define SCU0_CLK_GATE_SLICLK 44 +#define SCU0_CLK_GATE_DACCLK 45 +#define SCU0_CLK_GATE_DP 46 +#define SCU0_CLK_GATE_E2M1CLK 47 +#define SCU0_CLK_GATE_CRT0CLK 48 +#define SCU0_CLK_GATE_CRT1CLK 49 +#define SCU0_CLK_GATE_ECDSACLK 50 +#define SCU0_CLK_GATE_RSACLK 51 +#define SCU0_CLK_GATE_RVAS0CLK 52 +#define SCU0_CLK_GATE_UFSCLK 53 +#define SCU0_CLK_GATE_EMMCCLK 54 +#define SCU0_CLK_GATE_RVAS1CLK 55 + +/* SOC1 clk */ +#define SCU1_CLKIN 0 +#define SCU1_CLK_HPLL 1 +#define SCU1_CLK_APLL 2 +#define SCU1_CLK_APLL_DIV2 3 +#define SCU1_CLK_APLL_DIV4 4 +#define SCU1_CLK_DPLL 5 +#define SCU1_CLK_UXCLK 6 +#define SCU1_CLK_HUXCLK 7 +#define SCU1_CLK_UARTX 8 +#define SCU1_CLK_HUARTX 9 +#define SCU1_CLK_AHB 10 +#define SCU1_CLK_APB 11 +#define SCU1_CLK_UART0 12 +#define SCU1_CLK_UART1 13 +#define SCU1_CLK_UART2 14 +#define SCU1_CLK_UART3 15 +#define SCU1_CLK_UART5 16 +#define SCU1_CLK_UART6 17 +#define SCU1_CLK_UART7 18 +#define SCU1_CLK_UART8 19 +#define SCU1_CLK_UART9 20 +#define SCU1_CLK_UART10 21 +#define SCU1_CLK_UART11 22 +#define SCU1_CLK_UART12 23 +#define SCU1_CLK_UART13 24 +#define SCU1_CLK_UART14 25 +#define SCU1_CLK_APLL_DIVN 26 +#define SCU1_CLK_SDMUX 27 +#define SCU1_CLK_SDCLK 28 +#define SCU1_CLK_RMII 29 +#define SCU1_CLK_RGMII 30 +#define SCU1_CLK_MACHCLK 31 +#define SCU1_CLK_MAC0RCLK 32 +#define SCU1_CLK_MAC1RCLK 33 +#define SCU1_CLK_CAN 34 + +/* SOC1 clk gate */ +#define SCU1_CLK_GATE_LCLK0 35 +#define SCU1_CLK_GATE_LCLK1 36 +#define SCU1_CLK_GATE_ESPI0CLK 37 +#define SCU1_CLK_GATE_ESPI1CLK 38 +#define SCU1_CLK_GATE_SDCLK 39 +#define SCU1_CLK_GATE_IPEREFCLK 40 +#define SCU1_CLK_GATE_REFCLK 41 +#define SCU1_CLK_GATE_LPCHCLK 42 +#define SCU1_CLK_GATE_MAC0CLK 43 +#define SCU1_CLK_GATE_MAC1CLK 44 +#define SCU1_CLK_GATE_MAC2CLK 45 +#define SCU1_CLK_GATE_UART0CLK 46 +#define SCU1_CLK_GATE_UART1CLK 47 +#define SCU1_CLK_GATE_UART2CLK 48 +#define SCU1_CLK_GATE_UART3CLK 49 +#define SCU1_CLK_GATE_I2CCLK 50 +#define SCU1_CLK_GATE_I3C0CLK 51 +#define SCU1_CLK_GATE_I3C1CLK 52 +#define SCU1_CLK_GATE_I3C2CLK 53 +#define SCU1_CLK_GATE_I3C3CLK 54 +#define SCU1_CLK_GATE_I3C4CLK 55 +#define SCU1_CLK_GATE_I3C5CLK 56 +#define SCU1_CLK_GATE_I3C6CLK 57 +#define SCU1_CLK_GATE_I3C7CLK 58 +#define SCU1_CLK_GATE_I3C8CLK 59 +#define SCU1_CLK_GATE_I3C9CLK 60 +#define SCU1_CLK_GATE_I3C10CLK 61 +#define SCU1_CLK_GATE_I3C11CLK 62 +#define SCU1_CLK_GATE_I3C12CLK 63 +#define SCU1_CLK_GATE_I3C13CLK 64 +#define SCU1_CLK_GATE_I3C14CLK 65 +#define SCU1_CLK_GATE_I3C15CLK 66 +#define SCU1_CLK_GATE_UART5CLK 67 +#define SCU1_CLK_GATE_UART6CLK 68 +#define SCU1_CLK_GATE_UART7CLK 69 +#define SCU1_CLK_GATE_UART8CLK 70 +#define SCU1_CLK_GATE_UART9CLK 71 +#define SCU1_CLK_GATE_UART10CLK 72 +#define SCU1_CLK_GATE_UART11CLK 73 +#define SCU1_CLK_GATE_UART12CLK 74 +#define SCU1_CLK_GATE_FSICLK 75 +#define SCU1_CLK_GATE_LTPIPHYCLK 76 +#define SCU1_CLK_GATE_LTPICLK 77 +#define SCU1_CLK_GATE_VGALCLK 78 +#define SCU1_CLK_GATE_UHCICLK 79 +#define SCU1_CLK_GATE_CANCLK 80 +#define SCU1_CLK_GATE_PCICLK 81 +#define SCU1_CLK_GATE_SLICLK 82 +#define SCU1_CLK_GATE_E2MCLK 83 +#define SCU1_CLK_GATE_PORTCUSB2CLK 84 +#define SCU1_CLK_GATE_PORTDUSB2CLK 85 +#define SCU1_CLK_GATE_LTPI1TXCLK 86 + +#endif diff --git a/include/dt-bindings/reset/aspeed,ast2700-scu.h b/include/dt-bindings/reset/aspeed,ast2700-scu.h new file mode 100644 index 000000000000..d53c719b7a66 --- /dev/null +++ b/include/dt-bindings/reset/aspeed,ast2700-scu.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Device Tree binding constants for AST2700 reset controller. + * + * Copyright (c) 2024 Aspeed Technology Inc. + */ + +#ifndef _MACH_ASPEED_AST2700_RESET_H_ +#define _MACH_ASPEED_AST2700_RESET_H_ + +/* SOC0 */ +#define SCU0_RESET_SDRAM 0 +#define SCU0_RESET_DDRPHY 1 +#define SCU0_RESET_RSA 2 +#define SCU0_RESET_SHA3 3 +#define SCU0_RESET_HACE 4 +#define SCU0_RESET_SOC 5 +#define SCU0_RESET_VIDEO 6 +#define SCU0_RESET_2D 7 +#define SCU0_RESET_PCIS 8 +#define SCU0_RESET_RVAS0 9 +#define SCU0_RESET_RVAS1 10 +#define SCU0_RESET_SM3 11 +#define SCU0_RESET_SM4 12 +#define SCU0_RESET_CRT0 13 +#define SCU0_RESET_ECC 14 +#define SCU0_RESET_DP_PCI 15 +#define SCU0_RESET_UFS 16 +#define SCU0_RESET_EMMC 17 +#define SCU0_RESET_PCIE1RST 18 +#define SCU0_RESET_PCIE1RSTOE 19 +#define SCU0_RESET_PCIE0RST 20 +#define SCU0_RESET_PCIE0RSTOE 21 +#define SCU0_RESET_JTAG 22 +#define SCU0_RESET_MCTP0 23 +#define SCU0_RESET_MCTP1 24 +#define SCU0_RESET_XDMA0 25 +#define SCU0_RESET_XDMA1 26 +#define SCU0_RESET_H2X1 27 +#define SCU0_RESET_DP 28 +#define SCU0_RESET_DP_MCU 29 +#define SCU0_RESET_SSP 30 +#define SCU0_RESET_H2X0 31 +#define SCU0_RESET_PORTA_VHUB 32 +#define SCU0_RESET_PORTA_PHY3 33 +#define SCU0_RESET_PORTA_XHCI 34 +#define SCU0_RESET_PORTB_VHUB 35 +#define SCU0_RESET_PORTB_PHY3 36 +#define SCU0_RESET_PORTB_XHCI 37 +#define SCU0_RESET_PORTA_VHUB_EHCI 38 +#define SCU0_RESET_PORTB_VHUB_EHCI 39 +#define SCU0_RESET_UHCI 40 +#define SCU0_RESET_TSP 41 +#define SCU0_RESET_E2M0 42 +#define SCU0_RESET_E2M1 43 +#define SCU0_RESET_VLINK 44 + +/* SOC1 */ +#define SCU1_RESET_LPC0 0 +#define SCU1_RESET_LPC1 1 +#define SCU1_RESET_MII 2 +#define SCU1_RESET_PECI 3 +#define SCU1_RESET_PWM 4 +#define SCU1_RESET_MAC0 5 +#define SCU1_RESET_MAC1 6 +#define SCU1_RESET_MAC2 7 +#define SCU1_RESET_ADC 8 +#define SCU1_RESET_SD 9 +#define SCU1_RESET_ESPI0 10 +#define SCU1_RESET_ESPI1 11 +#define SCU1_RESET_JTAG1 12 +#define SCU1_RESET_SPI0 13 +#define SCU1_RESET_SPI1 14 +#define SCU1_RESET_SPI2 15 +#define SCU1_RESET_I3C0 16 +#define SCU1_RESET_I3C1 17 +#define SCU1_RESET_I3C2 18 +#define SCU1_RESET_I3C3 19 +#define SCU1_RESET_I3C4 20 +#define SCU1_RESET_I3C5 21 +#define SCU1_RESET_I3C6 22 +#define SCU1_RESET_I3C7 23 +#define SCU1_RESET_I3C8 24 +#define SCU1_RESET_I3C9 25 +#define SCU1_RESET_I3C10 26 +#define SCU1_RESET_I3C11 27 +#define SCU1_RESET_I3C12 28 +#define SCU1_RESET_I3C13 29 +#define SCU1_RESET_I3C14 30 +#define SCU1_RESET_I3C15 31 +#define SCU1_RESET_MCU0 32 +#define SCU1_RESET_MCU1 33 +#define SCU1_RESET_H2A_SPI1 34 +#define SCU1_RESET_H2A_SPI2 35 +#define SCU1_RESET_UART0 36 +#define SCU1_RESET_UART1 37 +#define SCU1_RESET_UART2 38 +#define SCU1_RESET_UART3 39 +#define SCU1_RESET_I2C_FILTER 40 +#define SCU1_RESET_CALIPTRA 41 +#define SCU1_RESET_XDMA 42 +#define SCU1_RESET_FSI 43 +#define SCU1_RESET_CAN 44 +#define SCU1_RESET_MCTP 45 +#define SCU1_RESET_I2C 46 +#define SCU1_RESET_UART6 47 +#define SCU1_RESET_UART7 48 +#define SCU1_RESET_UART8 49 +#define SCU1_RESET_UART9 50 +#define SCU1_RESET_LTPI0 51 +#define SCU1_RESET_VGAL 52 +#define SCU1_RESET_LTPI1 53 +#define SCU1_RESET_ACE 54 +#define SCU1_RESET_E2M 55 +#define SCU1_RESET_UHCI 56 +#define SCU1_RESET_PORTC_USB2UART 57 +#define SCU1_RESET_PORTC_VHUB_EHCI 58 +#define SCU1_RESET_PORTD_USB2UART 59 +#define SCU1_RESET_PORTD_VHUB_EHCI 60 +#define SCU1_RESET_H2X 61 +#define SCU1_RESET_I3CDMA 62 +#define SCU1_RESET_PCIE2RST 63 + +#endif /* _MACH_ASPEED_AST2700_RESET_H_ */ -- cgit v1.2.3 From e9f0a363473585a0f51b1b61a9bb15a63808d6ea Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 22 Oct 2024 13:01:12 +0200 Subject: tracing: Remove TRACE_FLAG_IRQS_NOSUPPORT It was possible to enable tracing with no IRQ tracing support. The tracing infrastructure would then record TRACE_FLAG_IRQS_NOSUPPORT as the only tracing flag and show an 'X' in the output. The last user of this feature was PPC32 which managed to implement it during PowerPC merge in 2009. Since then, it was unused and the PPC32 dependency was finally removed in commit 0ea5ee035133a ("tracing: Remove PPC32 wart from config TRACING_SUPPORT"). Since the PowerPC merge the code behind !CONFIG_TRACE_IRQFLAGS_SUPPORT with TRACING enabled can no longer be selected used and the 'X' is not displayed or recorded. Remove the CONFIG_TRACE_IRQFLAGS_SUPPORT from the tracing code. Remove TRACE_FLAG_IRQS_NOSUPPORT. Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241022110112.XJI8I9T2@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f8f2e52653df..016b29a56c87 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -184,7 +184,6 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, @@ -193,7 +192,6 @@ enum trace_flag_type { TRACE_FLAG_BH_OFF = 0x80, }; -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) { unsigned int irq_status = irqs_disabled_flags(irqflags) ? @@ -207,17 +205,6 @@ static inline unsigned int tracing_gen_ctx(void) local_save_flags(irqflags); return tracing_gen_ctx_flags(irqflags); } -#else - -static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) -{ - return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -} -static inline unsigned int tracing_gen_ctx(void) -{ - return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -} -#endif static inline unsigned int tracing_gen_ctx_dec(void) { -- cgit v1.2.3 From a9cfb8778c43fc473ae16cddb6e9611705721b31 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 31 Oct 2024 11:20:53 -0400 Subject: tracing: Introduce tracepoint extended structure Shrink the struct tracepoint size from 80 bytes to 72 bytes on x86-64 by moving the (typically NULL) regfunc/unregfunc pointers to an extended structure. Tested-by: Jordan Rife Cc: Michael Jeanson Cc: Thomas Gleixner Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/20241031152056.744137-2-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint-defs.h | 8 ++++++-- include/linux/tracepoint.h | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 60a6e8314d4c..967c08d9da84 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -29,6 +29,11 @@ struct tracepoint_func { int prio; }; +struct tracepoint_ext { + int (*regfunc)(void); + void (*unregfunc)(void); +}; + struct tracepoint { const char *name; /* Tracepoint name */ struct static_key_false key; @@ -36,9 +41,8 @@ struct tracepoint { void *static_call_tramp; void *iterator; void *probestub; - int (*regfunc)(void); - void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; + struct tracepoint_ext *ext; }; #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 0dc67fad706c..862ab49177a4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -302,7 +302,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. */ -#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ +#define __DEFINE_TRACE_EXT(_name, _ext, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ @@ -316,9 +316,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ .iterator = &__traceiter_##_name, \ .probestub = &__probestub_##_name, \ - .regfunc = _reg, \ - .unregfunc = _unreg, \ - .funcs = NULL }; \ + .funcs = NULL, \ + .ext = _ext, \ + }; \ __TRACEPOINT_ENTRY(_name); \ int __traceiter_##_name(void *__data, proto) \ { \ @@ -341,8 +341,15 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); -#define DEFINE_TRACE(name, proto, args) \ - DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); +#define DEFINE_TRACE_FN(_name, _reg, _unreg, _proto, _args) \ + static struct tracepoint_ext __tracepoint_ext_##_name = { \ + .regfunc = _reg, \ + .unregfunc = _unreg, \ + }; \ + __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); + +#define DEFINE_TRACE(_name, _proto, _args) \ + __DEFINE_TRACE_EXT(_name, NULL, PARAMS(_proto), PARAMS(_args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ -- cgit v1.2.3 From 654ced4a13774e5aec4d9466c97d22df7cb1e60b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 31 Oct 2024 11:20:54 -0400 Subject: tracing: Introduce tracepoint_is_faultable() Introduce a "faultable" flag within the extended structure to know whether a tracepoint needs rcu tasks trace grace period before reclaim. This can be queried using tracepoint_is_faultable(). Acked-by: Andrii Nakryiko Tested-by: Jordan Rife Cc: Michael Jeanson Cc: Thomas Gleixner Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/20241031152056.744137-3-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint-defs.h | 2 ++ include/linux/tracepoint.h | 24 ++++++++++++++++++++++++ include/trace/define_trace.h | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 967c08d9da84..aebf0571c736 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -32,6 +32,8 @@ struct tracepoint_func { struct tracepoint_ext { int (*regfunc)(void); void (*unregfunc)(void); + /* Flags. */ + unsigned int faultable:1; }; struct tracepoint { diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 862ab49177a4..906f3091d23d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -104,6 +104,12 @@ void for_each_tracepoint_in_module(struct module *mod, * tracepoint_synchronize_unregister must be called between the last tracepoint * probe unregistration and the end of module exit to make sure there is no * caller executing a probe when it is freed. + * + * An alternative is to use the following for batch reclaim associated + * with a given tracepoint: + * + * - tracepoint_is_faultable() == false: call_rcu() + * - tracepoint_is_faultable() == true: call_rcu_tasks_trace() */ #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) @@ -111,9 +117,17 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_rcu_tasks_trace(); synchronize_rcu(); } +static inline bool tracepoint_is_faultable(struct tracepoint *tp) +{ + return tp->ext && tp->ext->faultable; +} #else static inline void tracepoint_synchronize_unregister(void) { } +static inline bool tracepoint_is_faultable(struct tracepoint *tp) +{ + return false; +} #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS @@ -345,6 +359,15 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static struct tracepoint_ext __tracepoint_ext_##_name = { \ .regfunc = _reg, \ .unregfunc = _unreg, \ + .faultable = false, \ + }; \ + __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); + +#define DEFINE_TRACE_SYSCALL(_name, _reg, _unreg, _proto, _args) \ + static struct tracepoint_ext __tracepoint_ext_##_name = { \ + .regfunc = _reg, \ + .unregfunc = _unreg, \ + .faultable = true, \ }; \ __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); @@ -389,6 +412,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_SYSCALL __DECLARE_TRACE #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) +#define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args) #define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index ff5fa17a6259..63fea2218afa 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -48,7 +48,7 @@ #undef TRACE_EVENT_SYSCALL #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, print, reg, unreg) \ - DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) + DEFINE_TRACE_SYSCALL(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_NOP #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) -- cgit v1.2.3 From ee3685a98ea920369696d31e54d1fe6a939ed65f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 31 Oct 2024 11:20:56 -0400 Subject: tracing: Add might_fault() check in __DECLARE_TRACE_SYSCALL Catch incorrect use of syscall tracepoints even if no probes are registered by adding a might_fault() check in trace_##name() emitted by __DECLARE_TRACE_SYSCALL. Suggested-by: Thomas Gleixner Tested-by: Jordan Rife Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/20241031152056.744137-5-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 906f3091d23d..425123e921ac 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -301,6 +301,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ static inline void trace_##name(proto) \ { \ + might_fault(); \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ -- cgit v1.2.3 From 61c6fefa92bb4ed7a34163b94f6ffac628237a29 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 1 Nov 2024 11:17:53 -0700 Subject: bpf: decouple BPF link/attach hook and BPF program sleepable semantics BPF link's lifecycle protection scheme depends on both BPF hook and BPF program. If *either* of those require RCU Tasks Trace GP, then we need to go through a chain of GPs before putting BPF program refcount and deallocating BPF link memory. This patch adds bpf_link-specific sleepable flag, which can be set to true even if underlying BPF program is not sleepable itself. If either link->sleepable or link->prog->sleepable is true, we'll go through a chain of RCU Tasks Trace GP and RCU GP before putting BPF program and freeing memory. This will be used to protect BPF link for sleepable (faultable) raw tracepoints in the next patch. Link: https://lore.kernel.org/20241101181754.782341-2-andrii@kernel.org Tested-by: Jordan Rife Signed-off-by: Andrii Nakryiko Signed-off-by: Steven Rostedt (Google) --- include/linux/bpf.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19d8ca8ac960..e7236facadd4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1598,6 +1598,11 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; + /* whether BPF link itself has "sleepable" semantics, which can differ + * from underlying BPF program having a "sleepable" semantics, as BPF + * link's semantics is determined by target attach hook + */ + bool sleepable; /* rcu is used before freeing, work can be used to schedule that * RCU-based freeing before that, so they never overlap */ @@ -1614,8 +1619,10 @@ struct bpf_link_ops { */ void (*dealloc)(struct bpf_link *link); /* deallocate link resources callback, called after RCU grace period; - * if underlying BPF program is sleepable we go through tasks trace - * RCU GP and then "classic" RCU GP + * if either the underlying BPF program is sleepable or BPF link's + * target hook is sleepable, we'll go through tasks trace RCU GP and + * then "classic" RCU GP; this need for chaining tasks trace and + * classic RCU GPs is designated by setting bpf_link->sleepable flag */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); @@ -2362,6 +2369,9 @@ int bpf_prog_new_fd(struct bpf_prog *prog); void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + bool sleepable); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); @@ -2717,6 +2727,12 @@ static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, { } +static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + bool sleepable) +{ +} + static inline int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) { -- cgit v1.2.3 From d44d26987bb3df6d76556827097fc9ce17565cb8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Oct 2024 13:04:07 +0100 Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which would lead to 64bit multiplication overflows correctly. It's also protected against negative motion of the clocksource unconditionally, which was exclusive to x86 before. timekeeping_advance() handles large offsets already correctly. That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases is very close to zero. Remove all of it. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de --- include/linux/timekeeper_internal.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index a3b6380a7777..e39d4d563b19 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -76,9 +76,6 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second - * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) - * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) - * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -147,19 +144,6 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; - -#ifdef CONFIG_DEBUG_TIMEKEEPING - long last_warning; - /* - * These simple flag variables are managed - * without locks, which is racy, but they are - * ok since we don't really care about being - * super precise about how many events were - * seen, just that a problem was observed. - */ - int underflow_seen; - int overflow_seen; -#endif }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL -- cgit v1.2.3 From acb0ed843290f3b19ede1bd9328eb41dee58ce40 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 22 Oct 2024 01:24:34 +0100 Subject: crypto: asymmetric_keys - Remove unused functions encrypt_blob(), decrypt_blob() and create_signature() were some of the functions added in 2018 by commit 5a30771832aa ("KEYS: Provide missing asymmetric key subops for new key type ops [ver #2]") however, they've not been used. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- include/crypto/public_key.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index b7f308977c84..81098e00c08f 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -104,9 +104,6 @@ static inline int restrict_link_by_digsig(struct key *dest_keyring, extern int query_asymmetric_key(const struct kernel_pkey_params *, struct kernel_pkey_query *); -extern int encrypt_blob(struct kernel_pkey_params *, const void *, void *); -extern int decrypt_blob(struct kernel_pkey_params *, const void *, void *); -extern int create_signature(struct kernel_pkey_params *, const void *, void *); extern int verify_signature(const struct key *, const struct public_key_signature *); -- cgit v1.2.3 From 2a69297eed87c1f3ad33b8a169025c06adde5dcf Mon Sep 17 00:00:00 2001 From: Qi Tao Date: Sat, 26 Oct 2024 17:46:51 +0800 Subject: crypto: hisilicon - support querying the capability register Query the capability register status of accelerator devices (SEC, HPRE and ZIP) through the debugfs interface, for example: cat cap_regs. The purpose is to improve the robustness and locability of hardware devices and drivers. Signed-off-by: Qi Tao Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 389e95754776..7272eac850e8 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -274,13 +274,25 @@ struct hisi_qm_cap_info { u32 v3_val; }; +struct hisi_qm_cap_query_info { + u32 type; + const char *name; + u32 offset; + u32 v1_val; + u32 v2_val; + u32 v3_val; +}; + struct hisi_qm_cap_record { u32 type; + const char *name; u32 cap_val; }; struct hisi_qm_cap_tables { + u32 qm_cap_size; struct hisi_qm_cap_record *qm_cap_table; + u32 dev_cap_size; struct hisi_qm_cap_record *dev_cap_table; }; @@ -554,6 +566,9 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); u32 hisi_qm_get_hw_info(struct hisi_qm *qm, const struct hisi_qm_cap_info *info_table, u32 index, bool is_read); +u32 hisi_qm_get_cap_value(struct hisi_qm *qm, + const struct hisi_qm_cap_query_info *info_table, + u32 index, bool is_read); int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs, u32 dev_algs_size); -- cgit v1.2.3 From c418ba6baca3ae10ffaf47b0803d2a9e6bf1af96 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 26 Oct 2024 19:44:29 +0800 Subject: crypto: hisilicon/qm - disable same error report before resetting If an error indicating that the device needs to be reset is reported, disable the error reporting before device reset is complete, enable the error reporting after the reset is complete to prevent the same error from being reported repeatedly. Fixes: eaebf4c3b103 ("crypto: hisilicon - Unify hardware error init/uninit into QM") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 7272eac850e8..6dbd0d49628f 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -229,6 +229,12 @@ struct hisi_qm_status { struct hisi_qm; +enum acc_err_result { + ACC_ERR_NONE, + ACC_ERR_NEED_RESET, + ACC_ERR_RECOVERED, +}; + struct hisi_qm_err_info { char *acpi_rst; u32 msi_wr_port; @@ -257,9 +263,9 @@ struct hisi_qm_err_ini { void (*close_axi_master_ooo)(struct hisi_qm *qm); void (*open_sva_prefetch)(struct hisi_qm *qm); void (*close_sva_prefetch)(struct hisi_qm *qm); - void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); + enum acc_err_result (*get_err_result)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { -- cgit v1.2.3 From a812eee0b68645e2916d4a4399280fe5471cac67 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 10 Oct 2024 09:01:30 +0200 Subject: vdso: Rename struct arch_vdso_data to arch_vdso_time_data The struct arch_vdso_data is only about vdso time data. So rename it to arch_vdso_time_data to make it obvious. Non time-related data will be migrated out of these structs soon. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Acked-by: Heiko Carstens # s390 Link: https://lore.kernel.org/all/20241010-vdso-generic-base-v1-28-b64f0842d512@linutronix.de --- include/vdso/datapage.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index b85f24cac3f5..d967baa0cd0c 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -19,10 +19,10 @@ #include #include -#ifdef CONFIG_ARCH_HAS_VDSO_DATA -#include +#ifdef CONFIG_ARCH_HAS_VDSO_TIME_DATA +#include #else -struct arch_vdso_data {}; +struct arch_vdso_time_data {}; #endif #define VDSO_BASES (CLOCK_TAI + 1) @@ -114,7 +114,7 @@ struct vdso_data { u32 hrtimer_res; u32 __unused; - struct arch_vdso_data arch_data; + struct arch_vdso_time_data arch_data; }; /** -- cgit v1.2.3 From f6ca73063754950bf3fbad753e3a9557e3aa85e3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 2 Nov 2024 14:28:41 +0100 Subject: i3c: Document I3C_ADDR_SLOT_EXT_STATUS_MASK As the mask is part of the enum, document it. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20241102132841.2446176-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 6e5328c6c6af..12d532b012c5 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -298,6 +298,7 @@ enum i3c_open_drain_speed { * @I3C_ADDR_SLOT_I2C_DEV: address is assigned to an I2C device * @I3C_ADDR_SLOT_I3C_DEV: address is assigned to an I3C device * @I3C_ADDR_SLOT_STATUS_MASK: address slot mask + * @I3C_ADDR_SLOT_EXT_STATUS_MASK: address slot mask with extended information * @I3C_ADDR_SLOT_EXT_DESIRED: the bitmask represents addresses that are preferred by some devices, * such as the "assigned-address" property in a device tree source. * On an I3C bus, addresses are assigned dynamically, and we need to know which -- cgit v1.2.3 From 7029acd8a950393ee3a3d8e1a7ee1a9b77808a3b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Oct 2024 19:27:39 -0600 Subject: io_uring/rsrc: get rid of per-ring io_rsrc_node list Work in progress, but get rid of the per-ring serialization of resource nodes, like registered buffers and files. Main issue here is that one node can otherwise hold up a bunch of other nodes from getting freed, which is especially a problem for file resource nodes and networked workloads where some descriptors may not see activity in a long time. As an example, instantiate an io_uring ring fd and create a sparse registered file table. Even 2 will do. Then create a socket and register it as fixed file 0, F0. The number of open files in the app is now 5, with 0/1/2 being the usual stdin/out/err, 3 being the ring fd, and 4 being the socket. Register this socket (eg "the listener") in slot 0 of the registered file table. Now add an operation on the socket that uses slot 0. Finally, loop N times, where each loop creates a new socket, registers said socket as a file, then unregisters the socket, and finally closes the socket. This is roughly similar to what a basic accept loop would look like. At the end of this loop, it's not unreasonable to expect that there would still be 5 open files. Each socket created and registered in the loop is also unregistered and closed. But since the listener socket registered first still has references to its resource node due to still being active, each subsequent socket unregistration is stuck behind it for reclaim. Hence 5 + N files are still open at that point, where N is awaiting the final put held up by the listener socket. Rewrite the io_rsrc_node handling to NOT rely on serialization. Struct io_kiocb now gets explicit resource nodes assigned, with each holding a reference to the parent node. A parent node is either of type FILE or BUFFER, which are the two types of nodes that exist. A request can have two nodes assigned, if it's using both registered files and buffers. Since request issue and task_work completion is both under the ring private lock, no atomics are needed to handle these references. It's a simple unlocked inc/dec. As before, the registered buffer or file table each hold a reference as well to the registered nodes. Final put of the node will remove the node and free the underlying resource, eg unmap the buffer or put the file. Outside of removing the stall in resource reclaim described above, it has the following advantages: 1) It's a lot simpler than the previous scheme, and easier to follow. No need to specific quiesce handling anymore. 2) There are no resource node allocations in the fast path, all of that happens at resource registration time. 3) The structs related to resource handling can all get simplified quite a bit, like io_rsrc_node and io_rsrc_data. io_rsrc_put can go away completely. 4) Handling of resource tags is much simpler, and doesn't require persistent storage as it can simply get assigned up front at registration time. Just copy them in one-by-one at registration time and assign to the resource node. The only real downside is that a request is now explicitly limited to pinning 2 resources, one file and one buffer, where before just assigning a resource node to a request would pin all of them. The upside is that it's easier to follow now, as an individual resource is explicitly referenced and assigned to the request. With this in place, the above mentioned example will be using exactly 5 files at the end of the loop, not N. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index d4ba4ae480d6..42c5f2c992c4 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -56,7 +56,7 @@ struct io_wq_work { }; struct io_file_table { - struct io_fixed_file *files; + struct io_rsrc_node **nodes; unsigned long *bitmap; unsigned int alloc_hint; }; @@ -264,7 +264,6 @@ struct io_ring_ctx { * Fixed resources fast path, should be accessed only under * uring_lock, and updated through io_uring_register(2) */ - struct io_rsrc_node *rsrc_node; atomic_t cancel_seq; /* @@ -277,7 +276,7 @@ struct io_ring_ctx { struct io_wq_work_list iopoll_list; struct io_file_table file_table; - struct io_mapped_ubuf **user_bufs; + struct io_rsrc_node **user_bufs; unsigned nr_user_files; unsigned nr_user_bufs; @@ -372,10 +371,7 @@ struct io_ring_ctx { struct io_rsrc_data *buf_data; /* protected by ->uring_lock */ - struct list_head rsrc_ref_list; struct io_alloc_cache rsrc_node_cache; - struct wait_queue_head rsrc_quiesce_wq; - unsigned rsrc_quiesce; u32 pers_next; struct xarray personalities; @@ -642,7 +638,7 @@ struct io_kiocb { __poll_t apoll_events; }; - struct io_rsrc_node *rsrc_node; + struct io_rsrc_node *rsrc_nodes[2]; atomic_t refs; bool cancel_seq_set; -- cgit v1.2.3 From fbbb8e991d86bb7539de6161746b6c747f93f533 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 26 Oct 2024 06:43:44 -0600 Subject: io_uring/rsrc: get rid of io_rsrc_node allocation cache It's not going to be needed in the fast path going forward, so kill it off. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 42c5f2c992c4..696f2a05a98b 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -370,9 +370,6 @@ struct io_ring_ctx { struct io_rsrc_data *file_data; struct io_rsrc_data *buf_data; - /* protected by ->uring_lock */ - struct io_alloc_cache rsrc_node_cache; - u32 pers_next; struct xarray personalities; -- cgit v1.2.3 From 3597f2786b687a7f26361ce00a805ea0af41b65f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 26 Oct 2024 14:50:13 -0600 Subject: io_uring/rsrc: unify file and buffer resource tables For files, there's nr_user_files/file_table/file_data, and buffers have nr_user_bufs/user_bufs/buf_data. There's no reason why file_table and file_data can't be the same thing, and ditto for the buffer side. That gets rid of more io_ring_ctx state that's in two spots rather than just being in one spot, as it should be. Put all the registered file data in one locations, and ditto on the buffer front. This also avoids having both io_rsrc_data->nodes being an allocated array, and ->user_bufs[] or ->file_table.nodes. There's no reason to have this information duplicated. Keep it in one spot, io_rsrc_data, along with how many resources are available. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 696f2a05a98b..77fd508d043a 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -55,8 +55,13 @@ struct io_wq_work { int cancel_seq; }; +struct io_rsrc_data { + unsigned int nr; + struct io_rsrc_node **nodes; +}; + struct io_file_table { - struct io_rsrc_node **nodes; + struct io_rsrc_data data; unsigned long *bitmap; unsigned int alloc_hint; }; @@ -276,9 +281,7 @@ struct io_ring_ctx { struct io_wq_work_list iopoll_list; struct io_file_table file_table; - struct io_rsrc_node **user_bufs; - unsigned nr_user_files; - unsigned nr_user_bufs; + struct io_rsrc_data buf_table; struct io_submit_state submit_state; @@ -366,10 +369,6 @@ struct io_ring_ctx { struct wait_queue_head poll_wq; struct io_restriction restrictions; - /* slow path rsrc auxilary data, used by update/register */ - struct io_rsrc_data *file_data; - struct io_rsrc_data *buf_data; - u32 pers_next; struct xarray personalities; -- cgit v1.2.3 From b16e920a1909da6799c43000db730d8fcdcae907 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Oct 2024 18:43:13 -0600 Subject: io_uring/rsrc: allow cloning at an offset Right now buffer cloning is an all-or-nothing kind of thing - either the whole table is cloned from a source to a destination ring, or nothing at all. However, it's not always desired to clone the whole thing. Allow for the application to specify a source and destination offset, and a number of buffers to clone. If the destination offset is non-zero, then allocate sparse nodes upfront. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 024745283783..cc8dbe78c126 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -719,7 +719,10 @@ enum { struct io_uring_clone_buffers { __u32 src_fd; __u32 flags; - __u32 pad[6]; + __u32 src_off; + __u32 dst_off; + __u32 nr; + __u32 pad[3]; }; struct io_uring_buf { -- cgit v1.2.3 From c1329532d5aabecf79788924941afb8a7b7c1024 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Oct 2024 07:50:56 -0600 Subject: io_uring/rsrc: allow cloning with node replacements Currently cloning a buffer table will fail if the destination already has a table. But it should be possible to use it to replace existing elements. Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow the destination to already having a buffer table. If that is the case, then entries designated by offset + nr buffers will be replaced if they already exist. Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have an existing table, in which case it'll work just like not having the flag set and an empty table - it'll just assign the newly created table for that case. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cc8dbe78c126..ce58c4590de6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -713,7 +713,8 @@ struct io_uring_clock_register { }; enum { - IORING_REGISTER_SRC_REGISTERED = 1, + IORING_REGISTER_SRC_REGISTERED = (1U << 0), + IORING_REGISTER_DST_REPLACE = (1U << 1), }; struct io_uring_clone_buffers { -- cgit v1.2.3 From 01ee194d1aba1202f0926d5047a2a4cf84d0e45d Mon Sep 17 00:00:00 2001 From: hexue Date: Fri, 1 Nov 2024 17:19:57 +0800 Subject: io_uring: add support for hybrid IOPOLL A new hybrid poll is implemented on the io_uring layer. Once an IO is issued, it will not poll immediately, but rather block first and re-run before IO complete, then poll to reap IO. While this poll method could be a suboptimal solution when running on a single thread, it offers performance lower than regular polling but higher than IRQ, and CPU utilization is also lower than polling. To use hybrid polling, the ring must be setup with both the IORING_SETUP_IOPOLL and IORING_SETUP_HYBRID)IOPOLL flags set. Hybrid polling has the same restrictions as IOPOLL, in that commands must explicitly support it. Signed-off-by: hexue Link: https://lore.kernel.org/r/20241101091957.564220-2-xue01.he@samsung.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 19 +++++++++++++++++-- include/uapi/linux/io_uring.h | 3 +++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 77fd508d043a..d52fec533c51 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -298,6 +298,11 @@ struct io_ring_ctx { * ->uring_cmd() by io_uring_cmd_insert_cancelable() */ struct hlist_head cancelable_uring_cmd; + /* + * For Hybrid IOPOLL, runtime in hybrid polling, without + * scheduling time + */ + u64 hybrid_poll_time; } ____cacheline_aligned_in_smp; struct { @@ -449,6 +454,7 @@ enum { REQ_F_LINK_TIMEOUT_BIT, REQ_F_NEED_CLEANUP_BIT, REQ_F_POLLED_BIT, + REQ_F_HYBRID_IOPOLL_STATE_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_BUFFER_RING_BIT, REQ_F_REISSUE_BIT, @@ -507,6 +513,8 @@ enum { REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), + /* every req only blocks once in hybrid poll */ + REQ_F_IOPOLL_STATE = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT), /* buffer already selected */ REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), /* buffer selected from ring, needs commit */ @@ -639,8 +647,15 @@ struct io_kiocb { atomic_t refs; bool cancel_seq_set; struct io_task_work io_task_work; - /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ - struct hlist_node hash_node; + union { + /* + * for polled requests, i.e. IORING_OP_POLL_ADD and async armed + * poll + */ + struct hlist_node hash_node; + /* For IOPOLL setup queues, with hybrid polling */ + u64 iopoll_start; + }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; /* opcode allocated if it needs to store data for async defer */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ce58c4590de6..47977a5c65f5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit { */ #define IORING_SETUP_NO_SQARRAY (1U << 16) +/* Use hybrid poll in iopoll process */ +#define IORING_SETUP_HYBRID_IOPOLL (1U << 17) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, -- cgit v1.2.3 From 53c0a58beb60b76e105a61aae518fd780eec03d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 25 May 2024 23:32:20 -0400 Subject: net/socket.c: switch to CLASS(fd) The important part in sockfd_lookup_light() is avoiding needless file refcount operations, not the marginal reduction of the register pressure from not keeping a struct file pointer in the caller. Switch to use fdget()/fdpu(); with sane use of CLASS(fd) we can get a better code generation... Would be nice if somebody tested it on networking test suites (including benchmarks)... sockfd_lookup_light() does fdget(), uses sock_from_file() to get the associated socket and returns the struct socket reference to the caller, along with "do we need to fput()" flag. No matching fdput(), the caller does its equivalent manually, using the fact that sock->file points to the struct file the socket has come from. Get rid of that - have the callers do fdget()/fdput() and use sock_from_file() directly. That kills sockfd_lookup_light() and fput_light() (no users left). What's more, we can get rid of explicit fdget()/fdput() by switching to CLASS(fd, ...) - code generation does not suffer, since now fdput() inserted on "descriptor is not opened" failure exit is recognized to be a no-op by compiler. [folded a fix for braino in do_recvmmsg() caught by Simon Horman] Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/file.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index f98de143245a..b49a92295b3f 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -30,12 +30,6 @@ extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); -static inline void fput_light(struct file *file, int fput_needed) -{ - if (fput_needed) - fput(file); -} - /* either a reference to struct file + flags * (cloned vs. borrowed, pos locked), with * flags stored in lower bits of value, -- cgit v1.2.3 From f302edb9d822804e72df3fa6ba270234050c678b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Jul 2024 21:49:04 -0400 Subject: switch netlink_getsockbyfilp() to taking descriptor the only call site (in do_mq_notify()) obtains the argument from an immediately preceding fdget() and it is immediately followed by fdput(); might as well just replace it with a variant that would take a descriptor instead of struct file * and have file lookups handled inside that function. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b332c2048c75..a48a30842d84 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -239,7 +239,7 @@ int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ -struct sock *netlink_getsockbyfilp(struct file *filp); +struct sock *netlink_getsockbyfd(int fd); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From d7a9616ce0348b9d945d5dff82e4b44c0fe75b39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 May 2024 22:10:12 -0400 Subject: introduce "fd_pos" class, convert fdget_pos() users to it. fdget_pos() for constructor, fdput_pos() for cleanup, all users of fd..._pos() converted trivially. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index b49a92295b3f..4b09a8de2fd5 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -81,6 +81,7 @@ static inline void fdput_pos(struct fd f) DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd) +DEFINE_CLASS(fd_pos, struct fd, fdput_pos(_T), fdget_pos(fd), int fd) extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); -- cgit v1.2.3 From 38052c2dd71f5490f34bba21dc358e97fb205ee5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Jun 2024 19:29:04 -0400 Subject: deal with the last remaing boolean uses of fd_file() Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/cleanup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 038b2d523bf8..875c998275c0 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -234,7 +234,7 @@ const volatile void * __must_check_fn(const volatile void *val) * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) * * CLASS(fdget, f)(fd); - * if (!fd_file(f)) + * if (fd_empty(f)) * return -EBADF; * * // use 'f' without concern -- cgit v1.2.3 From cdfd5daf90af8363fb1f58e08c829a775b2e2fc5 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 1 Nov 2024 11:57:12 +0200 Subject: dt-bindings: clock: renesas,r9a08g045-vbattb: Document VBATTB The VBATTB IP of the Renesas RZ/G3S SoC controls the clock for RTC, the tamper detector and a small general usage memory of 128B. The VBATTB controller controls the clock for the RTC on the Renesas RZ/G3S. The HW block diagram for the clock logic is as follows: +----------+ XC `\ RTXIN --->| |----->| \ +----+ VBATTCLK | 32K clock| | |----->|gate|-----------> | osc | XBYP | | +----+ RTXOUT --->| |----->| / +----------+ ,/ One could connect as input to this HW block either a crystal or an external clock device. This is board specific. After discussions w/ Stephen Boyd the clock tree associated with this hardware block was exported in Linux as: input-xtal xbyp xc mux vbattclk where: - input-xtal is the input clock (connected to RTXIN, RTXOUT pins) - xc, xbyp are mux inputs - mux is the internal mux - vbattclk is the gate clock that feeds in the end the RTC to allow selecting the input of the MUX though assigned-clock DT properties, using the already existing clock drivers and avoid adding other DT properties. This allows select the input of the mux based on the type of the connected input clock: - if the 32768 crystal is connected as input for the VBATTB, the input of the mux should be xc - if an external clock device is connected as input for the VBATTB the input of the mux should be xbyp Add bindings for the VBATTB controller. Reviewed-by: Geert Uytterhoeven Reviewed-by: Krzysztof Kozlowski Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/20241101095720.2247815-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a08g045-vbattb.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/dt-bindings/clock/renesas,r9a08g045-vbattb.h (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a08g045-vbattb.h b/include/dt-bindings/clock/renesas,r9a08g045-vbattb.h new file mode 100644 index 000000000000..67774eafad06 --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a08g045-vbattb.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2024 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_R9A08G045_VBATTB_H__ +#define __DT_BINDINGS_CLOCK_R9A08G045_VBATTB_H__ + +#define VBATTB_XC 0 +#define VBATTB_XBYP 1 +#define VBATTB_MUX 2 +#define VBATTB_VBATTCLK 3 + +#endif /* __DT_BINDINGS_CLOCK_R9A08G045_VBATTB_H__ */ -- cgit v1.2.3 From a1afb959add1fad43cb337448c244ed70bac3109 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 30 Oct 2024 09:11:56 +0100 Subject: dpll: add clock quality level attribute and op In order to allow driver expose quality level of the clock it is running, introduce a new netlink attr with enum to carry it to the userspace. Also, introduce an op the dpll netlink code calls into the driver to obtain the value. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20241030081157.966604-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/linux/dpll.h | 4 ++++ include/uapi/linux/dpll.h | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 81f7b623d0ba..5e4f9ab1cf75 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -26,6 +26,10 @@ struct dpll_device_ops { struct netlink_ext_ack *extack); int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, s32 *temp, struct netlink_ext_ack *extack); + int (*clock_quality_level_get)(const struct dpll_device *dpll, + void *dpll_priv, + unsigned long *qls, + struct netlink_ext_ack *extack); }; struct dpll_pin_ops { diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index b0654ade7b7e..2b7ec2da4bcc 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -79,6 +79,29 @@ enum dpll_lock_status_error { DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1) }; +/** + * enum dpll_clock_quality_level - level of quality of a clock device. This + * mainly applies when the dpll lock-status is DPLL_LOCK_STATUS_HOLDOVER. The + * current list is defined according to the table 11-7 contained in ITU-T + * G.8264/Y.1364 document. One may extend this list freely by other ITU-T + * defined clock qualities, or different ones defined by another + * standardization body (for those, please use different prefix). + */ +enum dpll_clock_quality_level { + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRC = 1, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_SSU_A, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_SSU_B, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EEC1, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRTC, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EPRTC, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EEEC, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EPRC, + + /* private: */ + __DPLL_CLOCK_QUALITY_LEVEL_MAX, + DPLL_CLOCK_QUALITY_LEVEL_MAX = (__DPLL_CLOCK_QUALITY_LEVEL_MAX - 1) +}; + #define DPLL_TEMP_DIVIDER 1000 /** @@ -180,6 +203,7 @@ enum dpll_a { DPLL_A_TEMP, DPLL_A_TYPE, DPLL_A_LOCK_STATUS_ERROR, + DPLL_A_CLOCK_QUALITY_LEVEL, __DPLL_A_MAX, DPLL_A_MAX = (__DPLL_A_MAX - 1) -- cgit v1.2.3 From a8f80673ca0daaad990882529a5b4dc5114071e7 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 24 Oct 2024 17:37:41 +0800 Subject: compiler_types: Add noinline_for_tracing annotation Kernel functions that are not inlined can be easily hooked with BPF for tracing. However, functions intended for tracing may still be inlined unexpectedly. For example, in our case, after upgrading the compiler from GCC 9 to GCC 11, the tcp_drop_reason() function was inlined, which broke our monitoring tools. To prevent this, we need to ensure that the function remains non-inlined. The noinline_for_tracing annotation is introduced as a general solution for preventing inlining of kernel functions that need to be traced. This approach avoids the need for adding individual noinline comments to each function and provides a more consistent way to maintain traceability. Link: https://lore.kernel.org/netdev/CANn89iKvr44ipuRYFaPTpzwz=B_+pgA94jsggQ946mjwreV6Aw@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Yafang Shao Link: https://patch.msgid.link/20241024093742.87681-2-laoar.shao@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/compiler_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 1a957ea2f4fe..0c8b9601e603 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -265,6 +265,12 @@ struct ftrace_likely_data { */ #define noinline_for_stack noinline +/* + * Use noinline_for_tracing for functions that should not be inlined. + * For tracing reasons. + */ +#define noinline_for_tracing noinline + /* * Sanitizer helper attributes: Because using __always_inline and * __no_sanitize_* conflict, provide helper attributes that will either expand -- cgit v1.2.3 From 43d3487035e9a86fad952de4240a518614240d43 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 29 Oct 2024 15:55:35 -0600 Subject: UAPI: ethtool: Use __struct_group() in struct ethtool_link_settings Use the `__struct_group()` helper to create a new tagged `struct ethtool_link_settings_hdr`. This structure groups together all the members of the flexible `struct ethtool_link_settings` except the flexible array. As a result, the array is effectively separated from the rest of the members without modifying the memory layout of the flexible structure. This new tagged struct will be used to fix problematic declarations of middle-flex-arrays in composite structs[1]. [1] https://git.kernel.org/linus/d88cabfd9abc Signed-off-by: Gustavo A. R. Silva Link: https://patch.msgid.link/9e9fb0bd72e5ba1e916acbb4995b1e358b86a689.1730238285.git.gustavoars@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c405ed63acfa..fc1f54b065f9 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2511,21 +2511,24 @@ enum ethtool_reset_flags { * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { - __u32 cmd; - __u32 speed; - __u8 duplex; - __u8 port; - __u8 phy_address; - __u8 autoneg; - __u8 mdio_support; - __u8 eth_tp_mdix; - __u8 eth_tp_mdix_ctrl; - __s8 link_mode_masks_nwords; - __u8 transceiver; - __u8 master_slave_cfg; - __u8 master_slave_state; - __u8 rate_matching; - __u32 reserved[7]; + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(ethtool_link_settings_hdr, hdr, /* no attrs */, + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u8 transceiver; + __u8 master_slave_cfg; + __u8 master_slave_state; + __u8 rate_matching; + __u32 reserved[7]; + ); __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; -- cgit v1.2.3 From 3bd9b9abdf1563a22041b7255baea6d449902f1a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 29 Oct 2024 15:58:47 -0600 Subject: net: ethtool: Avoid thousands of -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Change the type of the middle struct member currently causing trouble from `struct ethtool_link_settings` to `struct ethtool_link_settings_hdr`. Additionally, update the type of some variables in various functions that don't access the flexible-array member, changing them to the newly created `struct ethtool_link_settings_hdr`. These changes are needed because the type of the conflicting middle members changed. So, those instances that expect the type to be `struct ethtool_link_settings` should be adjusted to the newly created type `struct ethtool_link_settings_hdr`. Also, adjust variable declarations to follow the reverse xmas tree convention. Fix 3338 of the following -Wflex-array-member-not-at-end warnings: include/linux/ethtool.h:214:38: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Link: https://patch.msgid.link/0bc2809fe2a6c11dd4c8a9a10d9bd65cccdb559b.1730238285.git.gustavoars@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 12f6dc567598..1199e308c8dd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,7 +211,7 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); * fields, but they are allowed to overwrite them (will be ignored). */ struct ethtool_link_ksettings { - struct ethtool_link_settings base; + struct ethtool_link_settings_hdr base; struct { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); -- cgit v1.2.3 From 6b2d11e2d8fc130df4708be0b6b53fd3e6b54cf6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 30 Oct 2024 04:22:33 +0000 Subject: net/tcp: Add missing lockdep annotations for TCP-AO hlist traversals Under CONFIG_PROVE_RCU_LIST + CONFIG_RCU_EXPERT hlist_for_each_entry_rcu() provides very helpful splats, which help to find possible issues. I missed CONFIG_RCU_EXPERT=y in my testing config the same as described in a3e4bf7f9675 ("configs/debug: make sure PROVE_RCU_LIST=y takes effect"). The fix itself is trivial: add the very same lockdep annotations as were used to dereference ao_info from the socket. Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20241028152645.35a8be66@kernel.org/ Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20241030-tcp-ao-hlist-lockdep-annotate-v1-1-bf641a64d7c6@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp_ao.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 1d46460d0fef..df655ce6987d 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -183,7 +183,8 @@ int tcp_ao_hash_skb(unsigned short int family, const u8 *tkey, int hash_offset, u32 sne); int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); -struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, +struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, + struct tcp_ao_info *ao, int sndid, int rcvid); int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, struct request_sock *req, struct sk_buff *skb, -- cgit v1.2.3 From 2ea25aab938a250bdf3148acd15359b56b91b40e Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 29 Oct 2024 16:18:49 -0500 Subject: pwm: core: export pwm_get_state_hw() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export the pwm_get_state_hw() function. This is useful in cases where we want to know what the hardware is actually doing, rather than what what we requested it should do. Locking had to be rearranged to ensure that the chip is still operational before trying to access ops now that this can be called from outside the pwm core. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20241029-pwm-export-pwm_get_state_hw-v2-1-03ba063a3230@baylibre.com [ukleinek: Add dummy for !CONFIG_PWM] Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index f1cb1e5b0a36..78827f312407 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -370,6 +370,7 @@ int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf int pwm_set_waveform_might_sleep(struct pwm_device *pwm, const struct pwm_waveform *wf, bool exact); int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** @@ -494,6 +495,11 @@ static inline int pwm_apply_atomic(struct pwm_device *pwm, return -EOPNOTSUPP; } +static inline int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state) +{ + return -EOPNOTSUPP; +} + static inline int pwm_adjust_config(struct pwm_device *pwm) { return -EOPNOTSUPP; -- cgit v1.2.3 From 9a5a2483bc60c12d73ac6ca5ac5ab95361a895f4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Nov 2024 12:53:42 +0200 Subject: iio: Mark iio_dev::priv member with __private The member is not supposed to be accessed directly, mark it with __private to catch the misuses up. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20241101105342.3645018-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 445d6666a291..5c6682bd4cb9 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -624,7 +624,7 @@ struct iio_dev { const struct iio_info *info; const struct iio_buffer_setup_ops *setup_ops; - void *priv; + void *priv __private; }; int iio_device_id(struct iio_dev *indio_dev); @@ -785,7 +785,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ static inline void *iio_priv(const struct iio_dev *indio_dev) { - return indio_dev->priv; + return ACCESS_PRIVATE(indio_dev, priv); } void iio_device_free(struct iio_dev *indio_dev); -- cgit v1.2.3 From 6e6738398def256126185cd25e2e3cb68f1bc0a3 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 1 Nov 2024 07:46:27 +0000 Subject: iio: hid-sensors: Add proximity and attention IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HID Usage Table at https://usb.org/sites/default/files/hut1_5.pdf reserves: - 0x4b2 for Human Proximity Range Distance between a human and the computer. Default unit of measure is meters; https://www.usb.org/sites/default/files/hutrr39b_0.pdf - 0x4bd for Human Attention Detected Human-Presence sensors detect the presence of humans in the sensor’s field-of-view using diverse and evolving technologies. Some presence sensors are implemented with low resolution video cameras, which can additionally track a subject’s attention (i.e. if the user is ‘looking’ at the system with the integrated sensor). A Human-Presence sensor, providing a Host with the user’s attention state, allows the Host to optimize its behavior. For example, to brighten/dim the system display, based on the user’s attention to the system (potentially prolonging battery life). Default unit is true/false; https://www.usb.org/sites/default/files/hutrr107-humanpresenceattention_1.pdf Signed-off-by: Ricardo Ribalda Link: https://patch.msgid.link/20241101-hpd-v3-1-e9c80b7c7164@chromium.org Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 6730ee900ee1..8a03d9696b1c 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -30,6 +30,8 @@ #define HID_USAGE_SENSOR_PROX 0x200011 #define HID_USAGE_SENSOR_DATA_PRESENCE 0x2004b0 #define HID_USAGE_SENSOR_HUMAN_PRESENCE 0x2004b1 +#define HID_USAGE_SENSOR_HUMAN_PROXIMITY 0x2004b2 +#define HID_USAGE_SENSOR_HUMAN_ATTENTION 0x2004bd /* Pressure (200031) */ #define HID_USAGE_SENSOR_PRESSURE 0x200031 -- cgit v1.2.3 From 9d2fe9cd02ca5f1e70a7eff0262fb3668a27db0c Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 1 Nov 2024 07:46:29 +0000 Subject: iio: Add channel type for attention Add a new channel type representing if the user's attention state to the the system. This usually means if the user is looking at the screen or not. Signed-off-by: Ricardo Ribalda Link: https://patch.msgid.link/20241101-hpd-v3-3-e9c80b7c7164@chromium.org Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index f2e0b2d50e6b..12886d4465e4 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -51,6 +51,7 @@ enum iio_chan_type { IIO_DELTA_VELOCITY, IIO_COLORTEMP, IIO_CHROMATICITY, + IIO_ATTENTION, }; enum iio_modifier { -- cgit v1.2.3 From b4b42f28a0df6b9d31f0003f7dea3bddf272eaa4 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Thu, 31 Oct 2024 16:27:02 +0100 Subject: iio: fix write_event_config signature write_event_config callback use an int for state, but it is actually a boolean. iio_ev_state_store is actually using kstrtobool to check user input, then gives the converted boolean value to write_event_config. Fix signature and update all iio drivers to use the new signature. This patch has been partially written using coccinelle with the following script: $ cat iio-bool.cocci // Options: --all-includes virtual patch @c1@ identifier iioinfo; identifier wecfunc; @@ static const struct iio_info iioinfo = { ..., .write_event_config = ( wecfunc | &wecfunc ), ..., }; @@ identifier c1.wecfunc; identifier indio_dev, chan, type, dir, state; @@ int wecfunc(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, enum iio_event_type type, enum iio_event_direction dir, -int +bool state) { ... } make coccicheck MODE=patch COCCI=iio-bool.cocci M=drivers/iio Unfortunately, this script didn't match all files: * all write_event_config callbacks using iio_device_claim_direct_scoped were not detected and not patched. * all files that do not assign and declare the write_event_config callback in the same file. iio.h was also manually updated. The patch was build tested using allmodconfig config. cc: Julia Lawall Signed-off-by: Julien Stephan Link: https://patch.msgid.link/20241031-iio-fix-write-event-config-signature-v2-7-2bcacbb517a2@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 5c6682bd4cb9..59c58f455311 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -514,7 +514,7 @@ struct iio_info { const struct iio_chan_spec *chan, enum iio_event_type type, enum iio_event_direction dir, - int state); + bool state); int (*read_event_value)(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, -- cgit v1.2.3 From 5d8173b8493151d32b99ec6732fb29c58256a7c8 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Mon, 28 Oct 2024 17:38:11 +0100 Subject: iio: events.h: add event identifier macros for differential channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, there are 3 helper macros in iio/events.h to create event identifiers: - IIO_EVENT_CODE : create generic event identifier for differential and non differential channels - IIO_MOD_EVENT_CODE : create event identifier for modified (non differential) channels - IIO_UNMOD_EVENT_CODE : create event identifier for unmodified (non differential) channels For differential channels, drivers are expected to use IIO_EVENT_CODE. However, only one driver in drivers/iio currently uses it correctly, leading to inconsistent event identifiers for differential channels that don’t match the intended attributes (such as max1363.c that supports differential channels, but only uses IIO_UNMOD_EVENT_CODE). To prevent such issues in future drivers, a new helper macro, IIO_DIFF_EVENT_CODE, is introduced to specifically create event identifiers for differential channels. Only one helper is needed for differential channels since they cannot have modifiers. Additionally, the descriptions for IIO_MOD_EVENT_CODE and IIO_UNMOD_EVENT_CODE have been updated to clarify that they are intended for non-differential channels, Signed-off-by: Julien Stephan Reviewed-by: David Lechner Link: https://patch.msgid.link/20241028-iio-add-macro-for-even-identifier-for-differential-channels-v1-1-b452c90f7ea6@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/events.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h index a4558c45a548..eeaba5e1525e 100644 --- a/include/linux/iio/events.h +++ b/include/linux/iio/events.h @@ -30,7 +30,8 @@ /** - * IIO_MOD_EVENT_CODE() - create event identifier for modified channels + * IIO_MOD_EVENT_CODE() - create event identifier for modified (non + * differential) channels * @chan_type: Type of the channel. Should be one of enum iio_chan_type. * @number: Channel number. * @modifier: Modifier for the channel. Should be one of enum iio_modifier. @@ -43,7 +44,8 @@ IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0) /** - * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified channels + * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified (non + * differential) channels * @chan_type: Type of the channel. Should be one of enum iio_chan_type. * @number: Channel number. * @type: Type of the event. Should be one of enum iio_event_type. @@ -53,4 +55,16 @@ #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction) \ IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0) +/** + * IIO_DIFF_EVENT_CODE() - create event identifier for differential channels + * @chan_type: Type of the channel. Should be one of enum iio_chan_type. + * @chan1: First channel number for differential channels. + * @chan2: Second channel number for differential channels. + * @type: Type of the event. Should be one of enum iio_event_type. + * @direction: Direction of the event. One of enum iio_event_direction. + */ + +#define IIO_DIFF_EVENT_CODE(chan_type, chan1, chan2, type, direction) \ + IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2) + #endif -- cgit v1.2.3 From 01f567d22152dfa8799e9fde5f18bbb5650d8681 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 1 Nov 2024 17:17:10 -0500 Subject: iio: events: make IIO_EVENT_CODE macro private Make IIO_EVENT_CODE "private" by adding a leading underscore. There are no more users of this macro in the kernel so we can make it "private" and encourage developers to use the specialized versions of the macro instead. Signed-off-by: David Lechner Link: https://patch.msgid.link/20241101-iio-fix-event-macro-use-v1-3-0000c5d09f6d@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/events.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h index eeaba5e1525e..72062a0c7c87 100644 --- a/include/linux/iio/events.h +++ b/include/linux/iio/events.h @@ -10,7 +10,7 @@ #include /** - * IIO_EVENT_CODE() - create event identifier + * _IIO_EVENT_CODE() - create event identifier * @chan_type: Type of the channel. Should be one of enum iio_chan_type. * @diff: Whether the event is for an differential channel or not. * @modifier: Modifier for the channel. Should be one of enum iio_modifier. @@ -19,10 +19,13 @@ * @chan: Channel number for non-differential channels. * @chan1: First channel number for differential channels. * @chan2: Second channel number for differential channels. + * + * Drivers should use the specialized macros below instead of using this one + * directly. */ -#define IIO_EVENT_CODE(chan_type, diff, modifier, direction, \ - type, chan, chan1, chan2) \ +#define _IIO_EVENT_CODE(chan_type, diff, modifier, direction, \ + type, chan, chan1, chan2) \ (((u64)type << 56) | ((u64)diff << 55) | \ ((u64)direction << 48) | ((u64)modifier << 40) | \ ((u64)chan_type << 32) | (((u16)chan2) << 16) | ((u16)chan1) | \ @@ -41,7 +44,7 @@ #define IIO_MOD_EVENT_CODE(chan_type, number, modifier, \ type, direction) \ - IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0) + _IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0) /** * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified (non @@ -53,7 +56,7 @@ */ #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction) \ - IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0) + _IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0) /** * IIO_DIFF_EVENT_CODE() - create event identifier for differential channels @@ -65,6 +68,6 @@ */ #define IIO_DIFF_EVENT_CODE(chan_type, chan1, chan2, type, direction) \ - IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2) + _IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2) #endif -- cgit v1.2.3 From a865276872ec4f129f8a582634be82dcc275dc2a Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 30 Oct 2024 18:23:25 -0600 Subject: dim: make dim_calc_stats() inputs const pointers Make the start and end arguments to dim_calc_stats() const pointers to clarify that the function does not modify their values. Signed-off-by: Caleb Sander Mateos Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Arthur Kiyanovski Link: https://patch.msgid.link/20241031002326.3426181-1-csander@purestorage.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index 1b581ff25a15..84579a50ae7f 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -351,7 +351,8 @@ void dim_park_tired(struct dim *dim); * Takes into consideration counter wrap-around. * Returned boolean indicates whether curr_stats are reliable. */ -bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(const struct dim_sample *start, + const struct dim_sample *end, struct dim_stats *curr_stats); /** -- cgit v1.2.3 From 61bf0009a7657d394d942c8ee961b9ea5f2168fe Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 30 Oct 2024 18:23:26 -0600 Subject: dim: pass dim_sample to net_dim() by reference net_dim() is currently passed a struct dim_sample argument by value. struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64 passes it on the stack. All callers have already initialized dim_sample on the stack, so passing it by value requires pushing a duplicated copy to the stack. Either witing to the stack and immediately reading it, or perhaps dereferencing addresses relative to the stack pointer in a chain of push instructions, seems to perform quite poorly. In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time, 94% of which is attributed to the first push instruction to copy dim_sample on the stack for the call to net_dim(): // Call ktime_get() 0.26 |4ead2: call 4ead7 // Pass the address of struct dim in %rdi |4ead7: lea 0x3d0(%rbx),%rdi // Set dim_sample.pkt_ctr |4eade: mov %r13d,0x8(%rsp) // Set dim_sample.byte_ctr |4eae3: mov %r12d,0xc(%rsp) // Set dim_sample.event_ctr 0.15 |4eae8: mov %bp,0x10(%rsp) // Duplicate dim_sample on the stack 94.16 |4eaed: push 0x10(%rsp) 2.79 |4eaf1: push 0x10(%rsp) 0.07 |4eaf5: push %rax // Call net_dim() 0.21 |4eaf6: call 4eafb To allow the caller to reuse the struct dim_sample already on the stack, pass the struct dim_sample by reference to net_dim(). Signed-off-by: Caleb Sander Mateos Reviewed-by: Vladimir Oltean Reviewed-by: Shannon Nelson Reviewed-by: Florian Fainelli Reviewed-by: Arthur Kiyanovski Reviewed-by: Louis Peens Link: https://patch.msgid.link/20241031002326.3426181-2-csander@purestorage.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index 84579a50ae7f..06543fd40fcc 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -425,7 +425,7 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); * This is the main logic of the algorithm, where data is processed in order * to decide on next required action. */ -void net_dim(struct dim *dim, struct dim_sample end_sample); +void net_dim(struct dim *dim, const struct dim_sample *end_sample); /* RDMA DIM */ -- cgit v1.2.3 From 1685f685ff8036c74c3b5e9006ec7ceda5bafefa Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 26 Oct 2024 18:43:33 +0300 Subject: soc: qcom: llcc: add support for SAR2130P and SAR1130P Implement necessary support for the LLCC control on the SAR1130P and SAR2130P platforms. These two platforms use different ATTR1_MAX_CAP shift and also require manual override for num_banks. Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241026-sar2130p-llcc-v3-3-2a58fa1b4d12@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9e9f528b1370..a79bfac230c6 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -54,7 +54,19 @@ #define LLCC_CAMEXP4 52 #define LLCC_DISP_WB 53 #define LLCC_DISP_1 54 +#define LLCC_VIEYE 57 +#define LLCC_VIDPTH 58 +#define LLCC_GPUMV 59 +#define LLCC_EVA_LEFT 60 +#define LLCC_EVA_RIGHT 61 +#define LLCC_EVAGAIN 62 +#define LLCC_VIPTH 63 #define LLCC_VIDVSP 64 +#define LLCC_DISP_LEFT 65 +#define LLCC_DISP_RIGHT 66 +#define LLCC_EVCS_LEFT 67 +#define LLCC_EVCS_RIGHT 68 +#define LLCC_SPAD 69 /** * struct llcc_slice_desc - Cache slice descriptor -- cgit v1.2.3 From 8ab3138a9b2dcb0ddf281240cf8cba414eb1224a Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Tue, 3 Sep 2024 14:37:51 +0300 Subject: net/mlx5: Introduce data placement ordering bits Introduce out-of-order (OOO) data placement (DP) IFC related bits to support OOO DP QP. Signed-off-by: Edward Srouji Reviewed-by: Yishai Hadas Link: https://patch.msgid.link/f30e5cbb5459fd02f27f35909bb545cab346b58b.1725362773.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96d369112bfa..2a037843b117 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1872,7 +1872,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x5]; + u8 dp_ordering_ooo_all_ud[0x1]; + u8 dp_ordering_ooo_all_uc[0x1]; + u8 dp_ordering_ooo_all_xrc[0x1]; + u8 dp_ordering_ooo_all_dc[0x1]; + u8 dp_ordering_ooo_all_rc[0x1]; u8 pcie_reset_using_hotreset_method[0x1]; u8 pci_sync_for_fw_update_with_driver_unload[0x1]; u8 vnic_env_cnt_steering_fail[0x1]; @@ -2094,7 +2098,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; u8 migratable[0x1]; - u8 reserved_at_81[0x11]; + u8 reserved_at_81[0x7]; + u8 dp_ordering_force[0x1]; + u8 reserved_at_89[0x9]; u8 query_vuid[0x1]; u8 reserved_at_93[0x5]; u8 umr_log_entity_size_5[0x1]; @@ -3524,7 +3530,8 @@ struct mlx5_ifc_qpc_bits { u8 latency_sensitive[0x1]; u8 reserved_at_24[0x1]; u8 drain_sigerr[0x1]; - u8 reserved_at_26[0x2]; + u8 reserved_at_26[0x1]; + u8 dp_ordering_force[0x1]; u8 pd[0x18]; u8 mtu[0x3]; @@ -3597,7 +3604,8 @@ struct mlx5_ifc_qpc_bits { u8 rae[0x1]; u8 reserved_at_493[0x1]; u8 page_offset[0x6]; - u8 reserved_at_49a[0x3]; + u8 reserved_at_49a[0x2]; + u8 dp_ordering_1[0x1]; u8 cd_slave_receive[0x1]; u8 cd_slave_send[0x1]; u8 cd_master[0x1]; @@ -4507,7 +4515,8 @@ struct mlx5_ifc_dctc_bits { u8 state[0x4]; u8 reserved_at_8[0x18]; - u8 reserved_at_20[0x8]; + u8 reserved_at_20[0x7]; + u8 dp_ordering_force[0x1]; u8 user_index[0x18]; u8 reserved_at_40[0x8]; @@ -4522,7 +4531,9 @@ struct mlx5_ifc_dctc_bits { u8 latency_sensitive[0x1]; u8 rlky[0x1]; u8 free_ar[0x1]; - u8 reserved_at_73[0xd]; + u8 reserved_at_73[0x1]; + u8 dp_ordering_1[0x1]; + u8 reserved_at_75[0xb]; u8 reserved_at_80[0x8]; u8 cs_res[0x8]; -- cgit v1.2.3 From fe5ba6bf91b3e30118c59fe51048cda101de6542 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 30 Oct 2024 17:39:15 +0800 Subject: net: enetc: add initial netc-blk-ctrl driver support The netc-blk-ctrl driver is used to configure Integrated Endpoint Register Block (IERB) and Privileged Register Block (PRB) of NETC. For i.MX platforms, it is also used to configure the NETCMIX block. The IERB contains registers that are used for pre-boot initialization, debug, and non-customer configuration. The PRB controls global reset and global error handling for NETC. The NETCMIX block is mainly used to set MII protocol and PCS protocol of the links, it also contains settings for some other functions. Note the IERB configuration registers can only be written after being unlocked by PRB, otherwise, all write operations are inhibited. A warm reset is performed when the IERB is unlocked, and it results in an FLR to all NETC devices. Therefore, all NETC device drivers must be probed or initialized after the warm reset is finished. Signed-off-by: Wei Fang Reviewed-by: Frank Li Signed-off-by: David S. Miller --- include/linux/fsl/netc_global.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/fsl/netc_global.h (limited to 'include') diff --git a/include/linux/fsl/netc_global.h b/include/linux/fsl/netc_global.h new file mode 100644 index 000000000000..fdecca8c90f0 --- /dev/null +++ b/include/linux/fsl/netc_global.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2024 NXP + */ +#ifndef __NETC_GLOBAL_H +#define __NETC_GLOBAL_H + +#include + +static inline u32 netc_read(void __iomem *reg) +{ + return ioread32(reg); +} + +static inline void netc_write(void __iomem *reg, u32 val) +{ + iowrite32(val, reg); +} + +#endif -- cgit v1.2.3 From 8b36f7c3c6618dc97697a6a20a13b29651f68ab6 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Tue, 3 Sep 2024 14:37:52 +0300 Subject: RDMA/mlx5: Support OOO RX WQE consumption Support QP with out-of-order (OOO) capabilities enabled. This allows WRs on the receiver side of the QP to be consumed OOO, permitting the sender side to transmit messages without guaranteeing arrival order on the receiver side. When enabled, the completion ordering of WRs remains in-order, regardless of the Receive WRs consumption order. RDMA Read and RDMA Atomic operations on the responder side continue to be executed in-order, while the ordering of data placement for RDMA Write and Send operations is not guaranteed. Atomic operations larger than 8 bytes are currently not supported. Therefore, when this feature is enabled, the created QP restricts its atomic support to 8 bytes at most. In addition, when querying the device, a new flag is returned in response to indicate that the Kernel supports OOO QP. Signed-off-by: Edward Srouji Reviewed-by: Yishai Hadas Link: https://patch.msgid.link/06ac609a5f358c8fb0a090d22c61a2f9329d82e6.1725362773.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5-abi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index d4f6a36dffb0..8a6ad6c6841c 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -252,6 +252,7 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, + MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP = 1 << 4, }; enum mlx5_ib_tunnel_offloads { @@ -439,6 +440,10 @@ struct mlx5_ib_burst_info { __u16 reserved; }; +enum mlx5_ib_modify_qp_mask { + MLX5_IB_MODIFY_QP_OOO_DP = 1 << 0, +}; + struct mlx5_ib_modify_qp { __u32 comp_mask; struct mlx5_ib_burst_info burst_info; -- cgit v1.2.3 From af7a35bf6c36a77624d3abe46b3830b7c2a5f20c Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Thu, 31 Oct 2024 15:36:51 +0200 Subject: RDMA/core: Implement RoCE GID port rescan and export delete function rdma_roce_rescan_port() scans all network devices in the system and adds the gids if relevant to the RoCE device port. When not in bonding mode it adds the GIDs of the netdevice in this port. When in bonding mode it adds the GIDs of both the port's netdevice and the bond master netdevice. Export roce_del_all_netdev_gids(), which removes all GIDs associated with a specific netdevice for a given port. Signed-off-by: Chiara Meiohas Link: https://patch.msgid.link/674d498da4637a1503ff1367e28bd09ff942fd5e.1730381292.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9cb8b5fe7eee..67551133b522 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4734,6 +4734,9 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) * @device: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ibdev); +void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); +void roce_del_all_netdev_gids(struct ib_device *ib_dev, + u32 port, struct net_device *ndev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); -- cgit v1.2.3 From dc6be4418a1144cce422093cde0245c76cdcaff0 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 31 Oct 2024 13:22:51 +0200 Subject: RDMA/core: Add device ufile cleanup operation Add a driver operation to allow preemptive cleanup of ufile HW resources before the standard ufile cleanup flow begins. Thus, expediting the final cleanup phase which leads to fast teardown overall. This allows the use of driver specific clean up procedures to make the cleanup process more efficient. Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/cabe00d75132b5732cb515944e3c500a01fb0b4a.1730373303.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 67551133b522..3417636da960 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2675,6 +2675,12 @@ struct ib_device_ops { */ void (*del_sub_dev)(struct ib_device *sub_dev); + /** + * ufile_cleanup - Attempt to cleanup ubojects HW resources inside + * the ufile. + */ + void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); -- cgit v1.2.3 From 27ed2f00807c2328c99751f9500ce6478f16cf7b Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 31 Oct 2024 13:22:52 +0200 Subject: RDMA/core: Move ib_uverbs_file struct to uverbs_types.h In light of the previous commit, make the ib_uverbs_file accessible to drivers by moving its definition to uverbs_types.h, to allow drivers to freely access the struct argument and create a personalized cleanup flow. For the same reason expose uverbs_try_lock_object function to allow driver to safely access the uverbs objects. Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/29b718e0dca35daa5f496320a39284fc1f5a1722.1730373303.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/uverbs_types.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index ccd11631c167..26ba919ac245 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -134,6 +134,8 @@ static inline void uverbs_uobject_get(struct ib_uobject *uobject) } void uverbs_uobject_put(struct ib_uobject *uobject); +int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode); + struct uverbs_obj_fd_type { /* * In fd based objects, uverbs_obj_type_ops points to generic @@ -150,6 +152,37 @@ struct uverbs_obj_fd_type { int flags; }; +struct ib_uverbs_file { + struct kref ref; + struct ib_uverbs_device *device; + struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ + struct ib_ucontext *ucontext; + struct ib_uverbs_async_event_file *default_async_file; + struct list_head list; + + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; + struct list_head uobjects; + + struct mutex umap_lock; + struct list_head umaps; + struct page *disassociate_page; + + struct xarray idr; + + struct mutex disassociation_lock; +}; + extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); -- cgit v1.2.3 From 7566752e4d7d7fc0186531aa800068a7243f95c1 Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Thu, 31 Oct 2024 11:31:14 +0200 Subject: RDMA/nldev: Add IB device and net device rename events Implement event sending for IB device rename and IB device port associated netdevice rename. In iproute2, rdma monitor displays the IB device name, port and the netdevice name when displaying event info. Since users can modiy these names, we track and notify on renaming events. Note: In order to receive netdevice rename events, drivers must use the ib_device_set_netdev() API when attaching net devices to IB devices. $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 [RENAME] dev 2 rocep8s0f0 [RENAME] dev 3 rocep8s0f1 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 [RENAME] dev 4 rdmap8s0f0 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_1 [NETDEV_ATTACH] dev 6 mlx5_1 port 1 netdev 12 eth9 [RENAME] dev 5 rocep8s0f0v0 [RENAME] dev 6 rocep8s0f0v1 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [RENAME] dev 7 rocep8s0f0v2 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 [RENAME] dev 8 rocep8s0f0v3 $ ip link set eth2 name myeth2 [NETDEV_RENAME] netdev 4 myeth2 $ ip link set eth1 name myeth1 ** no events received, because eth1 is not attached to an IB device ** Signed-off-by: Chiara Meiohas Link: https://patch.msgid.link/093c978ef2766fd3ab4ff8798eeb68f2f11582f6.1730367038.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 39be09c0ffbb..9f9cf20c1cd8 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -638,6 +638,8 @@ enum rdma_nl_notify_event_type { RDMA_UNREGISTER_EVENT, RDMA_NETDEV_ATTACH_EVENT, RDMA_NETDEV_DETACH_EVENT, + RDMA_RENAME_EVENT, + RDMA_NETDEV_RENAME_EVENT, }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 2a8f6153e1c2db06a537a5c9d61102eb591776f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Nov 2024 08:39:32 +0100 Subject: block: pre-calculate max_zone_append_sectors max_zone_append_sectors differs from all other queue limits in that the final value used is not stored in the queue_limits but needs to be obtained using queue_limits_max_zone_append_sectors helper. This not only adds (tiny) extra overhead to the I/O path, but also can be easily forgotten in file system code. Add a new max_hw_zone_append_sectors value to queue_limits which is set by the driver, and calculate max_zone_append_sectors from that and the other inputs in blk_validate_zoned_limits, similar to how max_sectors is calculated to fix this. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241104073955.112324-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7bfc877e159e..6d1413bd69a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,6 +375,7 @@ struct queue_limits { unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_hw_zone_append_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -1204,25 +1205,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned int -queue_limits_max_zone_append_sectors(const struct queue_limits *l) -{ - unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); - - return min_not_zero(l->max_zone_append_sectors, max_sectors); -} - -static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) -{ - if (!blk_queue_is_zoned(q)) - return 0; - - return queue_limits_max_zone_append_sectors(&q->limits); -} - static inline bool queue_emulates_zone_append(struct request_queue *q) { - return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; + return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors; } static inline bool bdev_emulates_zone_append(struct block_device *bdev) @@ -1233,7 +1218,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev) static inline unsigned int bdev_max_zone_append_sectors(struct block_device *bdev) { - return queue_max_zone_append_sectors(bdev_get_queue(bdev)); + return bdev_limits(bdev)->max_zone_append_sectors; } static inline unsigned int bdev_max_segments(struct block_device *bdev) -- cgit v1.2.3 From 9a783139614fb837da4ccb2f8ec6f0ddc802b3d3 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Mon, 4 Nov 2024 16:03:00 +1000 Subject: bpf: Move btf_type_is_struct_ptr() under CONFIG_BPF_SYSCALL The static inline btf_type_is_struct_ptr() function calls btf_type_skip_modifiers() which is guarded by CONFIG_BPF_SYSCALL. btf_type_is_struct_ptr() is also only called by CONFIG_BPF_SYSCALL ifdef code, so let's only expose btf_type_is_struct_ptr() if CONFIG_BPF_SYSCALL is defined. Signed-off-by: Alistair Francis Link: https://lore.kernel.org/r/20241104060300.421403-1-alistair.francis@wdc.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 631060e3ad14..4214e76c9168 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -582,6 +582,16 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx); + +static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) +{ + if (!btf_type_is_ptr(t)) + return false; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + + return btf_type_is_struct(t); +} #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -661,15 +671,4 @@ static inline int btf_check_iter_arg(struct btf *btf, const struct btf_type *fun return -EOPNOTSUPP; } #endif - -static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) -{ - if (!btf_type_is_ptr(t)) - return false; - - t = btf_type_skip_modifiers(btf, t->type, NULL); - - return btf_type_is_struct(t); -} - #endif -- cgit v1.2.3 From cb4158ce8ec8a5bb528cc1693356a5eb8058094d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 4 Nov 2024 09:19:57 -0800 Subject: bpf: Mark raw_tp arguments with PTR_MAYBE_NULL Arguments to a raw tracepoint are tagged as trusted, which carries the semantics that the pointer will be non-NULL. However, in certain cases, a raw tracepoint argument may end up being NULL. More context about this issue is available in [0]. Thus, there is a discrepancy between the reality, that raw_tp arguments can actually be NULL, and the verifier's knowledge, that they are never NULL, causing explicit NULL checks to be deleted, and accesses to such pointers potentially crashing the kernel. To fix this, mark raw_tp arguments as PTR_MAYBE_NULL, and then special case the dereference and pointer arithmetic to permit it, and allow passing them into helpers/kfuncs; these exceptions are made for raw_tp programs only. Ensure that we don't do this when ref_obj_id > 0, as in that case this is an acquired object and doesn't need such adjustment. The reason we do mask_raw_tp_trusted_reg logic is because other will recheck in places whether the register is a trusted_reg, and then consider our register as untrusted when detecting the presence of the PTR_MAYBE_NULL flag. To allow safe dereference, we enable PROBE_MEM marking when we see loads into trusted pointers with PTR_MAYBE_NULL. While trusted raw_tp arguments can also be passed into helpers or kfuncs where such broken assumption may cause issues, a future patch set will tackle their case separately, as PTR_TO_BTF_ID (without PTR_TRUSTED) can already be passed into helpers and causes similar problems. Thus, they are left alone for now. It is possible that these checks also permit passing non-raw_tp args that are trusted PTR_TO_BTF_ID with null marking. In such a case, allowing dereference when pointer is NULL expands allowed behavior, so won't regress existing programs, and the case of passing these into helpers is the same as above and will be dealt with later. Also update the failure case in tp_btf_nullable selftest to capture the new behavior, as the verifier will no longer cause an error when directly dereference a raw tracepoint argument marked as __nullable. [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb Reviewed-by: Jiri Olsa Reported-by: Juri Lelli Tested-by: Juri Lelli Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241104171959.2938862-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3ba4d475174..1b84613b10ac 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3495,4 +3495,10 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) return prog->aux->func_idx != 0; } +static inline bool bpf_prog_is_raw_tp(const struct bpf_prog *prog) +{ + return prog->type == BPF_PROG_TYPE_TRACING && + prog->expected_attach_type == BPF_TRACE_RAW_TP; +} + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From ad37bcd965fda43f34cf5cc051f5d310880bd1e7 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 30 Oct 2024 16:04:25 +0000 Subject: rust: add tracepoint support Make it possible to have Rust code call into tracepoints defined by C code. It is still required that the tracepoint is declared in a C header, and that this header is included in the input to bindgen. Instead of calling __DO_TRACE directly, the exported rust_do_trace_ function calls an inline helper function. This is because the `cond` argument does not exist at the callsite of DEFINE_RUST_DO_TRACE. __DECLARE_TRACE always emits an inline static and an extern declaration that is only used when CREATE_RUST_TRACE_POINTS is set. These should not end up in the final binary so it is not a problem that they sometimes are emitted without a user. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Jason Baron Cc: Ard Biesheuvel Cc: Miguel Ojeda Cc: Alex Gaynor Cc: Wedson Almeida Filho Cc: " =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= " Cc: Benno Lossin Cc: Andreas Hindborg Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Sean Christopherson Cc: Uros Bizjak Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Oliver Upton Cc: Mark Rutland Cc: Ryan Roberts Cc: Fuad Tabba Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Anup Patel Cc: Andrew Jones Cc: Alexandre Ghiti Cc: Conor Dooley Cc: Samuel Holland Cc: Huacai Chen Cc: WANG Xuerui Cc: Bibo Mao Cc: Tiezhu Yang Cc: Andrew Morton Cc: Tianrui Zhao Link: https://lore.kernel.org/20241030-tracepoint-v12-2-eec7f0f8ad22@google.com Reviewed-by: Carlos Llamas Reviewed-by: Gary Guo Reviewed-by: Boqun Feng Signed-off-by: Alice Ryhl Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 28 +++++++++++++++++++++++++++- include/trace/define_trace.h | 12 ++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 0dc67fad706c..84c4924e499f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -225,6 +225,18 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) preempt_enable_notrace(); \ } while (0) +/* + * Declare an exported function that Rust code can call to trigger this + * tracepoint. This function does not include the static branch; that is done + * in Rust to avoid a function call when the tracepoint is disabled. + */ +#define DEFINE_RUST_DO_TRACE(name, proto, args) +#define __DEFINE_RUST_DO_TRACE(name, proto, args) \ + notrace void rust_do_trace_##name(proto) \ + { \ + __rust_do_trace_##name(args); \ + } + /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -240,6 +252,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ + extern void rust_do_trace_##name(proto); \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -271,6 +284,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + static inline void __rust_do_trace_##name(proto) \ + { \ + __DO_TRACE(name, \ + TP_ARGS(args), \ + TP_CONDITION(cond), 0); \ + } \ static inline void trace_##name(proto) \ { \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ @@ -285,6 +304,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + static inline void __rust_do_trace_##name(proto) \ + { \ + __DO_TRACE(name, \ + TP_ARGS(args), \ + TP_CONDITION(cond), 1); \ + } \ static inline void trace_##name(proto) \ { \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ @@ -339,7 +364,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) void __probestub_##_name(void *__data, proto) \ { \ } \ - DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); + DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \ + DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args)) #define DEFINE_TRACE(name, proto, args) \ DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index ff5fa17a6259..0557626b6f6a 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -76,6 +76,13 @@ #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) +/* If requested, create helpers for calling these tracepoints from Rust. */ +#ifdef CREATE_RUST_TRACE_POINTS +#undef DEFINE_RUST_DO_TRACE +#define DEFINE_RUST_DO_TRACE(name, proto, args) \ + __DEFINE_RUST_DO_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif + #undef TRACE_INCLUDE #undef __TRACE_INCLUDE @@ -134,6 +141,11 @@ # undef UNDEF_TRACE_INCLUDE_PATH #endif +#ifdef CREATE_RUST_TRACE_POINTS +# undef DEFINE_RUST_DO_TRACE +# define DEFINE_RUST_DO_TRACE(name, proto, args) +#endif + /* We may be processing more files */ #define CREATE_TRACE_POINTS -- cgit v1.2.3 From 91d39024e1b02914cc5e2dbc137908e29b269ce4 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 30 Oct 2024 16:04:26 +0000 Subject: rust: samples: add tracepoint to Rust sample This updates the Rust printing sample to invoke a tracepoint. This ensures that we have a user in-tree from the get-go even though the patch is being merged before its real user. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Jason Baron Cc: Ard Biesheuvel Cc: Miguel Ojeda Cc: Alex Gaynor Cc: Wedson Almeida Filho Cc: Gary Guo Cc: " =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= " Cc: Benno Lossin Cc: Andreas Hindborg Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Sean Christopherson Cc: Uros Bizjak Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Oliver Upton Cc: Mark Rutland Cc: Ryan Roberts Cc: Fuad Tabba Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Anup Patel Cc: Andrew Jones Cc: Alexandre Ghiti Cc: Conor Dooley Cc: Samuel Holland Cc: Huacai Chen Cc: WANG Xuerui Cc: Bibo Mao Cc: Tiezhu Yang Cc: Andrew Morton Cc: Tianrui Zhao Link: https://lore.kernel.org/20241030-tracepoint-v12-3-eec7f0f8ad22@google.com Reviewed-by: Boqun Feng Signed-off-by: Alice Ryhl Signed-off-by: Steven Rostedt (Google) --- include/trace/events/rust_sample.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/trace/events/rust_sample.h (limited to 'include') diff --git a/include/trace/events/rust_sample.h b/include/trace/events/rust_sample.h new file mode 100644 index 000000000000..dbc80ca2e465 --- /dev/null +++ b/include/trace/events/rust_sample.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tracepoints for `samples/rust/rust_print.rs`. + * + * Copyright (C) 2024 Google, Inc. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rust_sample + +#if !defined(_RUST_SAMPLE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _RUST_SAMPLE_TRACE_H + +#include + +TRACE_EVENT(rust_sample_loaded, + TP_PROTO(int magic_number), + TP_ARGS(magic_number), + TP_STRUCT__entry( + __field(int, magic_number) + ), + TP_fast_assign( + __entry->magic_number = magic_number; + ), + TP_printk("magic=%d", __entry->magic_number) +); + +#endif /* _RUST_SAMPLE_TRACE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 5609296750afd6462a4d994b6803ccc5e8bf1d4e Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 30 Oct 2024 16:39:51 +0000 Subject: PM: EM: Add min/max available performance state limits On some devices there are HW dependencies for shared frequency and voltage between devices. It will impact Energy Aware Scheduler (EAS) decision, where CPUs share the voltage & frequency domain with other CPUs or devices e.g. - Mid CPUs + Big CPU - Little CPU + L3 cache in DSU - some other device + Little CPUs Detailed explanation of one example: When the L3 cache frequency is increased, the affected Little CPUs might run at higher voltage and frequency. That higher voltage causes higher CPU power and thus more energy is used for running the tasks. This is important for background running tasks, which try to run on energy efficient CPUs. Therefore, add performance state limits which are applied for the device (in this case CPU). This is important on SoCs with HW dependencies mentioned above so that the Energy Aware Scheduler (EAS) does not use performance states outside the valid min-max range for energy calculation. Signed-off-by: Lukasz Luba Link: https://patch.msgid.link/20241030164126.1263793-2-lukasz.luba@arm.com Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 1ff52020cf75..752e0b297582 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -55,6 +55,8 @@ struct em_perf_table { * struct em_perf_domain - Performance domain * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states + * @min_perf_state: Minimum allowed Performance State index + * @max_perf_state: Maximum allowed Performance State index * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache @@ -70,6 +72,8 @@ struct em_perf_table { struct em_perf_domain { struct em_perf_table __rcu *em_table; int nr_perf_states; + int min_perf_state; + int max_perf_state; unsigned long flags; unsigned long cpus[]; }; @@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table); int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states); int em_dev_update_chip_binning(struct device *dev); +int em_update_performance_limits(struct em_perf_domain *pd, + unsigned long freq_min_khz, unsigned long freq_max_khz); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM * @table: List of performance states, in ascending order - * @nr_perf_states: Number of performance states + * @pd: performance domain for which this must be done * @max_util: Max utilization to map with the EM - * @pd_flags: Performance Domain flags * * It is called from the scheduler code quite frequently and as a consequence * doesn't implement any check. @@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev); * requirement. */ static inline int -em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, - unsigned long max_util, unsigned long pd_flags) +em_pd_get_efficient_state(struct em_perf_state *table, + struct em_perf_domain *pd, unsigned long max_util) { + unsigned long pd_flags = pd->flags; + int min_ps = pd->min_perf_state; + int max_ps = pd->max_perf_state; struct em_perf_state *ps; int i; - for (i = 0; i < nr_perf_states; i++) { + for (i = min_ps; i <= max_ps; i++) { ps = &table[i]; if (ps->performance >= max_util) { if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && @@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, } } - return nr_perf_states - 1; + return max_ps; } /** @@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * requested performance. */ em_table = rcu_dereference(pd->em_table); - i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states, - max_util, pd->flags); + i = em_pd_get_efficient_state(em_table->state, pd, max_util); ps = &em_table->state[i]; /* @@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev) { return -EINVAL; } +static inline +int em_update_performance_limits(struct em_perf_domain *pd, + unsigned long freq_min_khz, unsigned long freq_max_khz) +{ + return -EINVAL; +} #endif #endif -- cgit v1.2.3 From 9e0933c21c128d6d8ac4d8aae0babaf9a43100b8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 4 Nov 2024 16:14:02 -0800 Subject: fs: iomap: Atomic write support Support direct I/O atomic writes by producing a single bio with REQ_ATOMIC flag set. Initially FSes (XFS) should only support writing a single FS block atomically. As with any atomic write, we should produce a single bio which covers the complete write length. Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: John Garry Reviewed-by: Ritesh Harjani (IBM) [djwong: clarify a couple of things in the docs] Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4ad12a3c8bae..c7644bdcfca3 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -178,6 +178,7 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ +#define IOMAP_ATOMIC (1 << 9) struct iomap_ops { /* -- cgit v1.2.3 From 0a2cdeeae9ddc14d752173be6af98bc9fb45c6ad Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 4 Nov 2024 15:00:41 +0800 Subject: net: tcp: replace the document for "lsndtime" in tcp_sock Commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") moved the fields around and misplaced the documentation for "lsndtime". So, let's replace it in the proper place. Signed-off-by: Menglong Dong Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241104070041.64302-1-dongml2@chinatelecom.cn Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6a5e08b937b3..f88daaa76d83 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -200,7 +200,6 @@ struct tcp_sock { /* TX read-mostly hotpath cache lines */ __cacheline_group_begin(tcp_sock_read_tx); - /* timestamp of last sent data packet (for restart window) */ u32 max_window; /* Maximal window ever seen from peer */ u32 rcv_ssthresh; /* Current window clamp */ u32 reordering; /* Packet reordering metric. */ @@ -263,7 +262,7 @@ struct tcp_sock { u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 pushed_seq; /* Last pushed seq, required to talk to windows */ - u32 lsndtime; + u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 mdev_us; /* medium deviation */ u32 rtt_seq; /* sequence number to update rttvar */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ -- cgit v1.2.3 From 690e50dd69ee48e43e0f7c42396487da1b81be14 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 3 Nov 2024 08:53:14 -0800 Subject: tools: ynl-gen: de-kdocify enums with no doc for entries Sometimes the names of the enum entries are self-explanatory or come from standards. Forcing authors to write trivial kdoc for each of such entries seems unreasonable, but kdoc would complain about undocumented entries. Detect enums which only have documentation for the entire type and no documentation for entries. Render their doc as a plain comment. Link: https://patch.msgid.link/20241103165314.1631237-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 2b7ec2da4bcc..bf97d4b6d51f 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -79,13 +79,13 @@ enum dpll_lock_status_error { DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1) }; -/** - * enum dpll_clock_quality_level - level of quality of a clock device. This - * mainly applies when the dpll lock-status is DPLL_LOCK_STATUS_HOLDOVER. The - * current list is defined according to the table 11-7 contained in ITU-T - * G.8264/Y.1364 document. One may extend this list freely by other ITU-T - * defined clock qualities, or different ones defined by another - * standardization body (for those, please use different prefix). +/* + * level of quality of a clock device. This mainly applies when the dpll + * lock-status is DPLL_LOCK_STATUS_HOLDOVER. The current list is defined + * according to the table 11-7 contained in ITU-T G.8264/Y.1364 document. One + * may extend this list freely by other ITU-T defined clock qualities, or + * different ones defined by another standardization body (for those, please + * use different prefix). */ enum dpll_clock_quality_level { DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRC = 1, -- cgit v1.2.3 From a12fcef429e17cb3db47cde0692a185d3ca712a3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 29 Oct 2024 18:43:15 +0200 Subject: soc: fsl_qbman: use be16_to_cpu() in qm_sg_entry_get_off() struct qm_sg_entry :: offset is a 13-bit field, declared as __be16. When using be32_to_cpu(), a wrong value will be calculated on little endian systems (Arm), because type promotion from 16-bit to 32-bit, which is done before the byte swap and always in the CPU native endianness, changes the value of the scatter/gather list entry offset in big-endian interpretation (adds two zero bytes in the LSB interpretation). The result of the byte swap is ANDed with GENMASK(12, 0), so the result is always zero, because only those bytes added by type promotion remain after the application of the bit mask. The impact of the bug is that scatter/gather frames with a non-zero offset into the buffer are treated by the driver as if they had a zero offset. This is all in theory, because in practice, qm_sg_entry_get_off() has a single caller, where the bug is inconsequential, because at that call site the buffer offset will always be zero, as will be explained in the subsequent change. Flagged by sparse: warning: cast to restricted __be32 warning: cast from restricted __be16 Signed-off-by: Vladimir Oltean Reviewed-by: Breno Leitao Acked-by: Christophe Leroy Acked-by: Madalin Bucur Link: https://patch.msgid.link/20241029164317.50182-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/soc/fsl/qman.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h index 0d3d6beb7fdb..7f7a4932d7f1 100644 --- a/include/soc/fsl/qman.h +++ b/include/soc/fsl/qman.h @@ -242,7 +242,7 @@ static inline void qm_sg_entry_set_f(struct qm_sg_entry *sg, int len) static inline int qm_sg_entry_get_off(const struct qm_sg_entry *sg) { - return be32_to_cpu(sg->offset) & QM_SG_OFF_MASK; + return be16_to_cpu(sg->offset) & QM_SG_OFF_MASK; } /* "Frame Dequeue Response" */ -- cgit v1.2.3 From f248ff14b7589306c8af922465aefedf9b10fa9e Mon Sep 17 00:00:00 2001 From: Desnes Nunes Date: Thu, 31 Oct 2024 11:28:00 -0300 Subject: misc: rtsx: Cleanup on DRV_NAME cardreader variables The rtsx_pci_ms memstick driver has been dropped, thus there is no more need for DRV_NAME_RTSX_PCI_MS variable. Additionally, this also stand- arizes DRV_NAME variables on alcor_pci and rtsx_usb drivers. Fixes: d0f459259c13 ("memstick: rtsx_pci_ms: Remove Realtek PCI memstick driver") Signed-off-by: Desnes Nunes Link: https://lore.kernel.org/r/20241031142801.1141680-1-desnesn@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/alcor_pci.h | 1 + include/linux/rtsx_common.h | 1 - include/linux/rtsx_usb.h | 4 ++++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/alcor_pci.h b/include/linux/alcor_pci.h index c4a0b23846d8..dcb1d37dabc2 100644 --- a/include/linux/alcor_pci.h +++ b/include/linux/alcor_pci.h @@ -11,6 +11,7 @@ #define ALCOR_SD_CARD 0 #define ALCOR_MS_CARD 1 +#define DRV_NAME_ALCOR_PCI "alcor_pci" #define DRV_NAME_ALCOR_PCI_SDMMC "alcor_sdmmc" #define DRV_NAME_ALCOR_PCI_MS "alcor_ms" diff --git a/include/linux/rtsx_common.h b/include/linux/rtsx_common.h index bf290ad14c57..da9c8c6b5d50 100644 --- a/include/linux/rtsx_common.h +++ b/include/linux/rtsx_common.h @@ -12,7 +12,6 @@ #define DRV_NAME_RTSX_PCI "rtsx_pci" #define DRV_NAME_RTSX_PCI_SDMMC "rtsx_pci_sdmmc" -#define DRV_NAME_RTSX_PCI_MS "rtsx_pci_ms" #define RTSX_REG_PAIR(addr, val) (((u32)(addr) << 16) | (u8)(val)) diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index 3247ed8e9ff0..f267a06c6b1e 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -12,6 +12,10 @@ #include +#define DRV_NAME_RTSX_USB "rtsx_usb" +#define DRV_NAME_RTSX_USB_SDMMC "rtsx_usb_sdmmc" +#define DRV_NAME_RTSX_USB_MS "rtsx_usb_ms" + /* related module names */ #define RTSX_USB_SD_CARD 0 #define RTSX_USB_MS_CARD 1 -- cgit v1.2.3 From 7fce207af5ec074a9a50e90eb866b17ca4a90f06 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 24 Oct 2024 10:18:07 -0700 Subject: mm/writeback: add folio_mark_dirty_lock() Add a new convenience helper folio_mark_dirty_lock() that grabs the folio lock before calling folio_mark_dirty(). Refactor set_page_dirty_lock() to directly use folio_mark_dirty_lock(). Signed-off-by: Joanne Koong Signed-off-by: Miklos Szeredi --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecf63d2b0582..446d7096c48f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2539,6 +2539,7 @@ struct kvec; struct page *get_dump_page(unsigned long addr); bool folio_mark_dirty(struct folio *folio); +bool folio_mark_dirty_lock(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -- cgit v1.2.3 From 35890f85573c2ebbbf3491dc66f7ee2ad63055af Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:45 -0300 Subject: vfio: Remove VFIO_TYPE1_NESTING_IOMMU This control causes the ARM SMMU drivers to choose a stage 2 implementation for the IO pagetable (vs the stage 1 usual default), however this choice has no significant visible impact to the VFIO user. Further qemu never implemented this and no other userspace user is known. The original description in commit f5c9ecebaf2a ("vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type") suggested this was to "provide SMMU translation services to the guest operating system" however the rest of the API to set the guest table pointer for the stage 1 and manage invalidation was never completed, or at least never upstreamed, rendering this part useless dead code. Upstream has now settled on iommufd as the uAPI for controlling nested translation. Choosing the stage 2 implementation should be done by through the IOMMU_HWPT_ALLOC_NEST_PARENT flag during domain allocation. Remove VFIO_TYPE1_NESTING_IOMMU and everything under it including the enable_nesting iommu_domain_op. Just in-case there is some userspace using this continue to treat requesting it as a NOP, but do not advertise support any more. Acked-by: Alex Williamson Reviewed-by: Mostafa Saleh Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 3 --- include/uapi/linux/vfio.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..c88d18d2c928 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -635,7 +635,6 @@ struct iommu_ops { * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform * specific mechanisms. - * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @free: Release the domain after use. */ @@ -663,7 +662,6 @@ struct iommu_domain_ops { dma_addr_t iova); bool (*enforce_cache_coherency)(struct iommu_domain *domain); - int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); @@ -844,7 +842,6 @@ extern void iommu_group_put(struct iommu_group *group); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); -int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2b68e6cdf190..c8dbf8219c4f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -35,7 +35,7 @@ #define VFIO_EEH 5 /* Two-stage IOMMU */ -#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ #define VFIO_SPAPR_TCE_v2_IOMMU 7 -- cgit v1.2.3 From 1b8655bb8d977ca110436c1cd0ca957c19670c1e Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:46 -0300 Subject: ACPICA: IORT: Update for revision E.f ACPICA commit c4f5c083d24df9ddd71d5782c0988408cf0fc1ab The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory Access Flag field in the Memory Access Properties table, mainly for a PCI Root Complex. This CANWBS defines the coherency of memory accesses to be not marked IOWB cacheable/shareable. Its value further implies the coherency impact from a pair of mismatched memory attributes (e.g. in a nested translation case): 0x0: Use of mismatched memory attributes for accesses made by this device may lead to a loss of coherency. 0x1: Coherency of accesses made by this device to locations in Conventional memory are ensured as follows, even if the memory attributes for the accesses presented by the device or provided by the SMMU are different from Inner and Outer Write-back cacheable, Shareable. Link: https://github.com/acpica/acpica/commit/c4f5c083 Acked-by: Rafael J. Wysocki Signed-off-by: Nicolin Chen Acked-by: Hanjun Guo Tested-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/acpi/actbl2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index d3858eebc255..2e917a8f8bca 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -453,7 +453,7 @@ struct acpi_table_ccel { * IORT - IO Remapping Table * * Conforms to "IO Remapping Table System Software on ARM Platforms", - * Document number: ARM DEN 0049E.e, Sep 2022 + * Document number: ARM DEN 0049E.f, Apr 2024 * ******************************************************************************/ @@ -524,6 +524,7 @@ struct acpi_iort_memory_access { #define ACPI_IORT_MF_COHERENCY (1) #define ACPI_IORT_MF_ATTRIBUTES (1<<1) +#define ACPI_IORT_MF_CANWBS (1<<2) /* * IORT node specific subtables -- cgit v1.2.3 From 807404d66fcf898d4bcc6a3e3edb07ffd5b88400 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:47 -0300 Subject: ACPI/IORT: Support CANWBS memory access flag The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory Access Flag field in the Memory Access Properties table, mainly for a PCI Root Complex. This CANWBS defines the coherency of memory accesses to be not marked IOWB cacheable/shareable. Its value further implies the coherency impact from a pair of mismatched memory attributes (e.g. in a nested translation case): 0x0: Use of mismatched memory attributes for accesses made by this device may lead to a loss of coherency. 0x1: Coherency of accesses made by this device to locations in Conventional memory are ensured as follows, even if the memory attributes for the accesses presented by the device or provided by the SMMU are different from Inner and Outer Write-back cacheable, Shareable. Note that the loss of coherency on a CANWBS-unsupported HW typically could occur to an SMMU that doesn't implement the S2FWB feature where additional cache flush operations would be required to prevent that from happening. Add a new ACPI_IORT_MF_CANWBS flag and set IOMMU_FWSPEC_PCI_RC_CANWBS upon the presence of this new flag. CANWBS and S2FWB are similar features, in that they both guarantee the VM can not violate coherency, however S2FWB can be bypassed by PCI No Snoop TLPs, while CANWBS cannot. Thus CANWBS meets the requirements to set IOMMU_CAP_ENFORCE_CACHE_COHERENCY. Architecturally ARM has expected that VFIO would disable No Snoop through PCI Config space, if this is done then the two would have the same protections. Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Acked-by: Hanjun Guo Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c88d18d2c928..4ad9b9ec6c9b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -991,6 +991,8 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* CANWBS is supported */ +#define IOMMU_FWSPEC_PCI_RC_CANWBS (1 << 1) /* * An iommu attach handle represents a relationship between an iommu domain -- cgit v1.2.3 From 6912ec91828b8d7f21b393befad1c36dadbcd751 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:49 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_GET_HW_INFO via struct arm_smmu_hw_info For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU instance need to be available to the VMM so it can construct an appropriate vSMMUv3 that reflects the correct HW capabilities. For userspace page tables these values are required to constrain the valid values within the CD table and the IOPTEs. The kernel does not sanitize these values. If building a VMM then userspace is required to only forward bits into a VM that it knows it can implement. Some bits will also require a VMM to detect if appropriate kernel support is available such as for ATS and BTM. Start a new file and kconfig for the advanced iommufd support. This lets it be compiled out for kernels that are not intended to support virtualization, and allows distros to leave it disabled until they are shipping a matching qemu too. Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/uapi/linux/iommufd.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..b5c94fecb94c 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -484,15 +484,50 @@ struct iommu_hw_info_vtd { __aligned_u64 ecap_reg; }; +/** + * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information + * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) + * + * @flags: Must be set to 0 + * @__reserved: Must be 0 + * @idr: Implemented features for ARM SMMU Non-secure programming interface + * @iidr: Information about the implementation and implementer of ARM SMMU, + * and architecture version supported + * @aidr: ARM SMMU architecture version + * + * For the details of @idr, @iidr and @aidr, please refer to the chapters + * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. + * + * User space should read the underlying ARM SMMUv3 hardware information for + * the list of supported features. + * + * Note that these values reflect the raw HW capability, without any insight if + * any required kernel driver support is present. Bits may be set indicating the + * HW has functionality that is lacking kernel software support, such as BTM. If + * a VMM is using this information to construct emulated copies of these + * registers it should only forward bits that it knows it can support. + * + * In future, presence of required kernel support will be indicated in flags. + */ +struct iommu_hw_info_arm_smmuv3 { + __u32 flags; + __u32 __reserved; + __u32 idr[6]; + __u32 iidr; + __u32 aidr; +}; + /** * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware * info * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type + * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, + IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, }; /** -- cgit v1.2.3 From 5205b63099507a84458075c3ca7e648407e6c8cc Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 4 Nov 2024 11:27:57 +0800 Subject: media: uapi: Add MEDIA_BUS_FMT_RGB101010_1X7X5_{SPWG, JEIDA} Add two media bus formats that identify 30-bit RGB pixels transmitted by a LVDS link with five differential data pairs, serialized into 7 time slots, using standard SPWG/VESA or JEIDA data mapping. Signed-off-by: Liu Ying Acked-by: Sakari Ailus Link: https://patchwork.freedesktop.org/patch/msgid/20241104032806.611890-5-victor.liu@nxp.com Signed-off-by: Dmitry Baryshkov --- include/uapi/linux/media-bus-format.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index d4c1d991014b..ff62056feed5 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -34,7 +34,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1026 */ +/* RGB - next is 0x1028 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -68,6 +68,8 @@ #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d #define MEDIA_BUS_FMT_RGB888_1X32_PADHI 0x100f #define MEDIA_BUS_FMT_RGB101010_1X30 0x1018 +#define MEDIA_BUS_FMT_RGB101010_1X7X5_SPWG 0x1026 +#define MEDIA_BUS_FMT_RGB101010_1X7X5_JEIDA 0x1027 #define MEDIA_BUS_FMT_RGB666_1X36_CPADLO 0x1020 #define MEDIA_BUS_FMT_RGB888_1X36_CPADLO 0x1021 #define MEDIA_BUS_FMT_RGB121212_1X36 0x1019 -- cgit v1.2.3 From 606410292f54ef08632bdfd5c58974cf4ebc3cc9 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 4 Nov 2024 11:27:59 +0800 Subject: drm: of: Add drm_of_lvds_get_dual_link_pixel_order_sink() drm_of_lvds_get_dual_link_pixel_order() gets LVDS dual-link source pixel order. Similar to it, add it's counterpart function drm_of_lvds_get_dual_link_pixel_order_sink() to get LVDS dual-link sink pixel order. Suggested-by: Dmitry Baryshkov Signed-off-by: Liu Ying Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20241104032806.611890-7-victor.liu@nxp.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_of.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h index 02d1cdd7f798..7f0256dae3f1 100644 --- a/include/drm/drm_of.h +++ b/include/drm/drm_of.h @@ -52,6 +52,8 @@ int drm_of_find_panel_or_bridge(const struct device_node *np, struct drm_bridge **bridge); int drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1, const struct device_node *port2); +int drm_of_lvds_get_dual_link_pixel_order_sink(struct device_node *port1, + struct device_node *port2); int drm_of_lvds_get_data_mapping(const struct device_node *port); int drm_of_get_data_lanes_count(const struct device_node *endpoint, const unsigned int min, const unsigned int max); @@ -109,6 +111,13 @@ drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1, return -EINVAL; } +static inline int +drm_of_lvds_get_dual_link_pixel_order_sink(struct device_node *port1, + struct device_node *port2) +{ + return -EINVAL; +} + static inline int drm_of_lvds_get_data_mapping(const struct device_node *port) { -- cgit v1.2.3 From 5c1806c41ce0a0110db5dd4c483cf2dc28b3ddf0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 4 Nov 2024 16:43:07 +0100 Subject: kcsan, seqlock: Support seqcount_latch_t While fuzzing an arm64 kernel, Alexander Potapenko reported: | BUG: KCSAN: data-race in ktime_get_mono_fast_ns / timekeeping_update | | write to 0xffffffc082e74248 of 56 bytes by interrupt on cpu 0: | update_fast_timekeeper kernel/time/timekeeping.c:430 [inline] | timekeeping_update+0x1d8/0x2d8 kernel/time/timekeeping.c:768 | timekeeping_advance+0x9e8/0xb78 kernel/time/timekeeping.c:2344 | update_wall_time+0x18/0x38 kernel/time/timekeeping.c:2360 | [...] | | read to 0xffffffc082e74258 of 8 bytes by task 5260 on cpu 1: | __ktime_get_fast_ns kernel/time/timekeeping.c:372 [inline] | ktime_get_mono_fast_ns+0x88/0x174 kernel/time/timekeeping.c:489 | init_srcu_struct_fields+0x40c/0x530 kernel/rcu/srcutree.c:263 | init_srcu_struct+0x14/0x20 kernel/rcu/srcutree.c:311 | [...] | | value changed: 0x000002f875d33266 -> 0x000002f877416866 | | Reported by Kernel Concurrency Sanitizer on: | CPU: 1 UID: 0 PID: 5260 Comm: syz.2.7483 Not tainted 6.12.0-rc3-dirty #78 This is a false positive data race between a seqcount latch writer and a reader accessing stale data. Since its introduction, KCSAN has never understood the seqcount_latch interface (due to being unannotated). Unlike the regular seqlock interface, the seqcount_latch interface for latch writers never has had a well-defined critical section, making it difficult to teach tooling where the critical section starts and ends. Introduce an instrumentable (non-raw) seqcount_latch interface, with which we can clearly denote writer critical sections. This both helps readability and tooling like KCSAN to understand when the writer is done updating all latch copies. Fixes: 88ecd153be95 ("seqlock, kcsan: Add annotations for KCSAN") Reported-by: Alexander Potapenko Co-developed-by: "Peter Zijlstra (Intel)" Signed-off-by: "Peter Zijlstra (Intel)" Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241104161910.780003-4-elver@google.com --- include/linux/seqlock.h | 86 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index fffeb754880f..45eee0e5dca0 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -621,6 +621,23 @@ static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t * return READ_ONCE(s->seqcount.sequence); } +/** + * read_seqcount_latch() - pick even/odd latch data copy + * @s: Pointer to seqcount_latch_t + * + * See write_seqcount_latch() for details and a full reader/writer usage + * example. + * + * Return: sequence counter raw value. Use the lowest bit as an index for + * picking which data copy to read. The full counter must then be checked + * with read_seqcount_latch_retry(). + */ +static __always_inline unsigned read_seqcount_latch(const seqcount_latch_t *s) +{ + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); + return raw_read_seqcount_latch(s); +} + /** * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section * @s: Pointer to seqcount_latch_t @@ -635,9 +652,34 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) return unlikely(READ_ONCE(s->seqcount.sequence) != start); } +/** + * read_seqcount_latch_retry() - end a seqcount_latch_t read section + * @s: Pointer to seqcount_latch_t + * @start: count, from read_seqcount_latch() + * + * Return: true if a read section retry is required, else false + */ +static __always_inline int +read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) +{ + kcsan_atomic_next(0); + return raw_read_seqcount_latch_retry(s, start); +} + /** * raw_write_seqcount_latch() - redirect latch readers to even/odd copy * @s: Pointer to seqcount_latch_t + */ +static __always_inline void raw_write_seqcount_latch(seqcount_latch_t *s) +{ + smp_wmb(); /* prior stores before incrementing "sequence" */ + s->seqcount.sequence++; + smp_wmb(); /* increment "sequence" before following stores */ +} + +/** + * write_seqcount_latch_begin() - redirect latch readers to odd copy + * @s: Pointer to seqcount_latch_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -665,17 +707,11 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * * void latch_modify(struct latch_struct *latch, ...) * { - * smp_wmb(); // Ensure that the last data[1] update is visible - * latch->seq.sequence++; - * smp_wmb(); // Ensure that the seqcount update is visible - * + * write_seqcount_latch_begin(&latch->seq); * modify(latch->data[0], ...); - * - * smp_wmb(); // Ensure that the data[0] update is visible - * latch->seq.sequence++; - * smp_wmb(); // Ensure that the seqcount update is visible - * + * write_seqcount_latch(&latch->seq); * modify(latch->data[1], ...); + * write_seqcount_latch_end(&latch->seq); * } * * The query will have a form like:: @@ -686,13 +722,13 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * unsigned seq, idx; * * do { - * seq = raw_read_seqcount_latch(&latch->seq); + * seq = read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * * // This includes needed smp_rmb() - * } while (raw_read_seqcount_latch_retry(&latch->seq, seq)); + * } while (read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } @@ -716,11 +752,31 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -static inline void raw_write_seqcount_latch(seqcount_latch_t *s) +static __always_inline void write_seqcount_latch_begin(seqcount_latch_t *s) { - smp_wmb(); /* prior stores before incrementing "sequence" */ - s->seqcount.sequence++; - smp_wmb(); /* increment "sequence" before following stores */ + kcsan_nestable_atomic_begin(); + raw_write_seqcount_latch(s); +} + +/** + * write_seqcount_latch() - redirect latch readers to even copy + * @s: Pointer to seqcount_latch_t + */ +static __always_inline void write_seqcount_latch(seqcount_latch_t *s) +{ + raw_write_seqcount_latch(s); +} + +/** + * write_seqcount_latch_end() - end a seqcount_latch_t write section + * @s: Pointer to seqcount_latch_t + * + * Marks the end of a seqcount_latch_t writer section, after all copies of the + * latch-protected data have been updated. + */ +static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s) +{ + kcsan_nestable_atomic_end(); } #define __SEQLOCK_UNLOCKED(lockname) \ -- cgit v1.2.3 From 93190bc35d6d4364a4d8c38ac8961dabecbff4ed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 4 Nov 2024 16:43:08 +0100 Subject: seqlock, treewide: Switch to non-raw seqcount_latch interface Switch all instrumentable users of the seqcount_latch interface over to the non-raw interface. Co-developed-by: "Peter Zijlstra (Intel)" Signed-off-by: "Peter Zijlstra (Intel)" Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241104161910.780003-5-elver@google.com --- include/linux/rbtree_latch.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index 6a0999c26c7c..2f630eb8307e 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -14,7 +14,7 @@ * * If we need to allow unconditional lookups (say as required for NMI context * usage) we need a more complex setup; this data structure provides this by - * employing the latch technique -- see @raw_write_seqcount_latch -- to + * employing the latch technique -- see @write_seqcount_latch_begin -- to * implement a latched RB-tree which does allow for unconditional lookups by * virtue of always having (at least) one stable copy of the tree. * @@ -132,7 +132,7 @@ __lt_find(void *key, struct latch_tree_root *ltr, int idx, * @ops: operators defining the node order * * It inserts @node into @root in an ordered fashion such that we can always - * observe one complete tree. See the comment for raw_write_seqcount_latch(). + * observe one complete tree. See the comment for write_seqcount_latch_begin(). * * The inserts use rcu_assign_pointer() to publish the element such that the * tree structure is stored before we can observe the new @node. @@ -145,10 +145,11 @@ latch_tree_insert(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch_begin(&root->seq); __lt_insert(node, root, 0, ops->less); - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch(&root->seq); __lt_insert(node, root, 1, ops->less); + write_seqcount_latch_end(&root->seq); } /** @@ -159,7 +160,7 @@ latch_tree_insert(struct latch_tree_node *node, * * Removes @node from the trees @root in an ordered fashion such that we can * always observe one complete tree. See the comment for - * raw_write_seqcount_latch(). + * write_seqcount_latch_begin(). * * It is assumed that @node will observe one RCU quiescent state before being * reused of freed. @@ -172,10 +173,11 @@ latch_tree_erase(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch_begin(&root->seq); __lt_erase(node, root, 0); - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch(&root->seq); __lt_erase(node, root, 1); + write_seqcount_latch_end(&root->seq); } /** @@ -204,9 +206,9 @@ latch_tree_find(void *key, struct latch_tree_root *root, unsigned int seq; do { - seq = raw_read_seqcount_latch(&root->seq); + seq = read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); - } while (raw_read_seqcount_latch_retry(&root->seq, seq)); + } while (read_seqcount_latch_retry(&root->seq, seq)); return node; } -- cgit v1.2.3 From 183ec5f26b2fc97a4a9871865bfe9b33c41fddb2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 4 Nov 2024 16:43:09 +0100 Subject: kcsan, seqlock: Fix incorrect assumption in read_seqbegin() During testing of the preceding changes, I noticed that in some cases, current->kcsan_ctx.in_flat_atomic remained true until task exit. This is obviously wrong, because _all_ accesses for the given task will be treated as atomic, resulting in false negatives i.e. missed data races. Debugging led to fs/dcache.c, where we can see this usage of seqlock: struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) { struct dentry *dentry; unsigned seq; do { seq = read_seqbegin(&rename_lock); dentry = __d_lookup(parent, name); if (dentry) break; } while (read_seqretry(&rename_lock, seq)); [...] As can be seen, read_seqretry() is never called if dentry != NULL; consequently, current->kcsan_ctx.in_flat_atomic will never be reset to false by read_seqretry(). Give up on the wrong assumption of "assume closing read_seqretry()", and rely on the already-present annotations in read_seqcount_begin/retry(). Fixes: 88ecd153be95 ("seqlock, kcsan: Add annotations for KCSAN") Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241104161910.780003-6-elver@google.com --- include/linux/seqlock.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 45eee0e5dca0..5298765d6ca4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -810,11 +810,7 @@ static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s) */ static inline unsigned read_seqbegin(const seqlock_t *sl) { - unsigned ret = read_seqcount_begin(&sl->seqcount); - - kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ - kcsan_flat_atomic_begin(); - return ret; + return read_seqcount_begin(&sl->seqcount); } /** @@ -830,12 +826,6 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { - /* - * Assume not nested: read_seqretry() may be called multiple times when - * completing read critical section. - */ - kcsan_flat_atomic_end(); - return read_seqcount_retry(&sl->seqcount, start); } -- cgit v1.2.3 From 0f0d1b8e5010bfe1feeb4d78d137e41946a5370d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Oct 2024 14:20:35 +0100 Subject: sched/ext: Remove sched_fork() hack Instead of solving the underlying problem of the double invocation of __sched_fork() for idle tasks, sched-ext decided to hack around the issue by partially clearing out the entity struct to preserve the already enqueued node. A provided analysis and solution has been ignored for four months. Now that someone else has taken care of cleaning it up, remove the disgusting hack and clear out the full structure. Remove the comment in the structure declaration as well, as there is no requirement for @node being the last element anymore. Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Link: https://lore.kernel.org/r/87ldy82wkc.ffs@tglx --- include/linux/sched/ext.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1ddbde64a31b..2799e7284fff 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -199,7 +199,6 @@ struct sched_ext_entity { #ifdef CONFIG_EXT_GROUP_SCHED struct cgroup *cgrp_moving_from; #endif - /* must be the last field, see init_scx_entity() */ struct list_head tasks_node; }; -- cgit v1.2.3 From 26baa1f1c4bdc34b8d698c1900b407d863ad0e69 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Oct 2024 14:47:02 +0200 Subject: sched: Add TIF_NEED_RESCHED_LAZY infrastructure Add the basic infrastructure to split the TIF_NEED_RESCHED bit in two. Either bit will cause a resched on return-to-user, but only TIF_NEED_RESCHED will drive IRQ preemption. No behavioural change intended. Suggested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20241007075055.219540785@infradead.org --- include/linux/entry-common.h | 3 ++- include/linux/entry-kvm.h | 5 +++-- include/linux/sched.h | 3 ++- include/linux/thread_info.h | 21 +++++++++++++++++---- 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 1e50cdb83ae5..fc61d0205c97 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -64,7 +64,8 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ + _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 6813171afccb..16149f6625e4 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -17,8 +17,9 @@ #endif #define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ - _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) + (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ + _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ + ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/sched.h b/include/linux/sched.h index a76e3d074a2a..1d5cc3e50884 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk) static inline void clear_tsk_need_resched(struct task_struct *tsk) { - clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); + atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY, + (atomic_long_t *)&task_thread_info(tsk)->flags); } static inline int test_tsk_need_resched(struct task_struct *tsk) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9ea0b28068f4..cf2446c9c30d 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -59,6 +59,14 @@ enum syscall_work_bit { #include +#ifndef TIF_NEED_RESCHED_LAZY +#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY +#error Inconsistent PREEMPT_LAZY +#endif +#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED +#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED +#endif + #ifdef __KERNEL__ #ifndef arch_set_restart_data @@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H -static __always_inline bool tif_need_resched(void) +static __always_inline bool tif_test_bit(int bit) { - return arch_test_bit(TIF_NEED_RESCHED, + return arch_test_bit(bit, (unsigned long *)(¤t_thread_info()->flags)); } #else -static __always_inline bool tif_need_resched(void) +static __always_inline bool tif_test_bit(int bit) { - return test_bit(TIF_NEED_RESCHED, + return test_bit(bit, (unsigned long *)(¤t_thread_info()->flags)); } #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ +static __always_inline bool tif_need_resched(void) +{ + return tif_test_bit(TIF_NEED_RESCHED); +} + #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, const void * const stackend, -- cgit v1.2.3 From 7c70cb94d29cd325fabe4a818c18613e3b9919a1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Oct 2024 14:46:58 +0200 Subject: sched: Add Lazy preemption model Change fair to use resched_curr_lazy(), which, when the lazy preemption model is selected, will set TIF_NEED_RESCHED_LAZY. This LAZY bit will be promoted to the full NEED_RESCHED bit on tick. As such, the average delay between setting LAZY and actually rescheduling will be TICK_NSEC/2. In short, Lazy preemption will delay preemption for fair class but will function as Full preemption for all the other classes, most notably the realtime (RR/FIFO/DEADLINE) classes. The goal is to bridge the performance gap with Voluntary, such that we might eventually remove that option entirely. Suggested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20241007075055.331243614@infradead.org --- include/linux/preempt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index ce76f1a45722..ca86235ac15c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) extern bool preempt_model_none(void); extern bool preempt_model_voluntary(void); extern bool preempt_model_full(void); +extern bool preempt_model_lazy(void); #else @@ -502,6 +503,11 @@ static inline bool preempt_model_full(void) return IS_ENABLED(CONFIG_PREEMPT); } +static inline bool preempt_model_lazy(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_LAZY); +} + #endif static inline bool preempt_model_rt(void) @@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void) */ static inline bool preempt_model_preemptible(void) { - return preempt_model_full() || preempt_model_rt(); + return preempt_model_full() || preempt_model_lazy() || preempt_model_rt(); } #endif /* __LINUX_PREEMPT_H */ -- cgit v1.2.3 From 18d92bb57c39504d9da11c6ef604f58eb1d5a117 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2024 18:59:08 +0300 Subject: perf/core: Add aux_pause, aux_resume, aux_start_paused Hardware traces, such as instruction traces, can produce a vast amount of trace data, so being able to reduce tracing to more specific circumstances can be useful. The ability to pause or resume tracing when another event happens, can do that. Add ability for an event to "pause" or "resume" AUX area tracing. Add aux_pause bit to perf_event_attr to indicate that, if the event happens, the associated AUX area tracing should be paused. Ditto aux_resume. Do not allow aux_pause and aux_resume to be set together. Add aux_start_paused bit to perf_event_attr to indicate to an AUX area event that it should start in a "paused" state. Add aux_paused to struct hw_perf_event for AUX area events to keep track of the "paused" state. aux_paused is initialized to aux_start_paused. Add PERF_EF_PAUSE and PERF_EF_RESUME modes for ->stop() and ->start() callbacks. Call as needed, during __perf_event_output(). Add aux_in_pause_resume to struct perf_buffer to prevent races with the NMI handler. Pause/resume in NMI context will miss out if it coincides with another pause/resume. To use aux_pause or aux_resume, an event must be in a group with the AUX area event as the group leader. Example (requires Intel PT and tools patches also): $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.043 MB perf.data ] $ perf script --call-trace uname 30805 [000] 24001.058782799: name: 0x7ffc9c1865b0 uname 30805 [000] 24001.058784424: psb offs: 0 uname 30805 [000] 24001.058784424: cbr: 39 freq: 3904 MHz (139%) uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __x64_sys_newuname uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) down_read uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __cond_resched uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) in_lock_functions uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_sub uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) up_read uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) in_lock_functions uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) preempt_count_sub uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) _copy_to_user uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_to_user_mode uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_work uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) perf_syscall_exit uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_alloc uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_get_recursion_context uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_tp_event uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_update uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) tracing_gen_ctx_irq_test uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_event uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __perf_event_account_interrupt uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __this_cpu_preempt_check uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_output_forward uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_aux_pause uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) ring_buffer_get uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_lock uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_unlock uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) pt_event_stop uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) native_write_msr uname 30805 [000] 24001.058785463: ([kernel.kallsyms]) native_write_msr uname 30805 [000] 24001.058785639: 0x0 Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Clark Link: https://lkml.kernel.org/r/20241022155920.17511-3-adrian.hunter@intel.com --- include/linux/perf_event.h | 28 ++++++++++++++++++++++++++++ include/uapi/linux/perf_event.h | 11 ++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fb908843f209..91b310052a7c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -170,6 +170,12 @@ struct hw_perf_event { }; struct { /* aux / Intel-PT */ u64 aux_config; + /* + * For AUX area events, aux_paused cannot be a state + * flag because it can be updated asynchronously to + * state. + */ + unsigned int aux_paused; }; struct { /* software */ struct hrtimer hrtimer; @@ -294,6 +300,7 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_NO_EXCLUDE 0x0040 #define PERF_PMU_CAP_AUX_OUTPUT 0x0080 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_AUX_PAUSE 0x0200 /** * pmu::scope @@ -384,6 +391,8 @@ struct pmu { #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ +#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */ +#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */ /* * Adds/Removes a counter to/from the PMU, can be done inside a @@ -423,6 +432,18 @@ struct pmu { * * ->start() with PERF_EF_RELOAD will reprogram the counter * value, must be preceded by a ->stop() with PERF_EF_UPDATE. + * + * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not + * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with + * PERF_EF_RESUME. + * + * ->start() with PERF_EF_RESUME will start as simply as possible but + * only if the counter is not otherwise stopped. Will not overlap + * another ->start() with PERF_EF_RESUME nor ->stop() with + * PERF_EF_PAUSE. + * + * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other + * ->stop()/->start() invocations, just not itself. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); @@ -1679,6 +1700,13 @@ static inline bool has_aux(struct perf_event *event) return event->pmu->setup_aux; } +static inline bool has_aux_action(struct perf_event *event) +{ + return event->attr.aux_sample_size || + event->attr.aux_pause || + event->attr.aux_resume; +} + static inline bool is_write_backward(struct perf_event *event) { return !!event->attr.write_backward; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4842c36fdf80..0524d541d4e3 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -511,7 +511,16 @@ struct perf_event_attr { __u16 sample_max_stack; __u16 __reserved_2; __u32 aux_sample_size; - __u32 __reserved_3; + + union { + __u32 aux_action; + struct { + __u32 aux_start_paused : 1, /* start AUX area tracing paused */ + aux_pause : 1, /* on overflow, pause AUX area tracing */ + aux_resume : 1, /* on overflow, resume AUX area tracing */ + __reserved_3 : 29; + }; + }; /* * User provided data if sigtrap=1, passed back to user via -- cgit v1.2.3 From 95e2eaf5b91aae6c3a433cd7882733bd806fa3c8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 4 Nov 2024 09:40:30 +0800 Subject: iommu/vt-d: Remove unused dmar_msi_read dmar_msi_read() has been unused since 2022 in commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture") Remove it. (dmar_msi_write still exists and is used once). Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20241022002702.302728-1-linux@treblig.org Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 499bb2c63483..692b2b445761 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -292,7 +292,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src) struct irq_data; extern void dmar_msi_unmask(struct irq_data *data); extern void dmar_msi_mask(struct irq_data *data); -extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); -- cgit v1.2.3 From bebf29b18f34620e25f7e2bd9e4e4d8e34a8977d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Nov 2024 17:03:31 +0000 Subject: sysfs: introduce callback attribute_group::bin_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several drivers need to dynamically calculate the size of an binary attribute. Currently this is done by assigning attr->size from the is_bin_visible() callback. This has drawbacks: * It is not documented. * A single attribute can be instantiated multiple times, overwriting the shared size field. * It prevents the structure to be moved to read-only memory. Introduce a new dedicated callback to calculate the size of the attribute. Signed-off-by: Thomas Weißschuh Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-2-71110628844c@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c4e64dc11206..4746cccb9589 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -87,6 +87,11 @@ do { \ * SYSFS_GROUP_VISIBLE() when assigning this callback to * specify separate _group_visible() and _attr_visible() * handlers. + * @bin_size: + * Optional: Function to return the size of a binary attribute + * of the group. Will be called repeatedly for each binary + * attribute in the group. Overwrites the size field embedded + * inside the attribute itself. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -97,6 +102,9 @@ struct attribute_group { struct attribute *, int); umode_t (*is_bin_visible)(struct kobject *, struct bin_attribute *, int); + size_t (*bin_size)(struct kobject *, + const struct bin_attribute *, + int); struct attribute **attrs; struct bin_attribute **bin_attrs; }; -- cgit v1.2.3 From b626816fdd7f9beb841856ba049396cff46e99aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Nov 2024 17:03:34 +0000 Subject: sysfs: treewide: constify attribute callback of bin_is_visible() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The is_bin_visible() callbacks should not modify the struct bin_attribute passed as argument. Enforce this by marking the argument as const. As there are not many callback implementers perform this change throughout the tree at once. Signed-off-by: Thomas Weißschuh Acked-by: Martin K. Petersen Acked-by: Jason Gunthorpe Acked-by: Ira Weiny Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-5-71110628844c@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 4746cccb9589..d1b22d56198b 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -101,7 +101,7 @@ struct attribute_group { umode_t (*is_visible)(struct kobject *, struct attribute *, int); umode_t (*is_bin_visible)(struct kobject *, - struct bin_attribute *, int); + const struct bin_attribute *, int); size_t (*bin_size)(struct kobject *, const struct bin_attribute *, int); @@ -199,22 +199,22 @@ struct attribute_group { * attributes, the group visibility is determined by the function * specified to is_visible() not is_bin_visible() */ -#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ - static inline umode_t sysfs_group_visible_##name( \ - struct kobject *kobj, struct bin_attribute *attr, int n) \ - { \ - if (n == 0 && !name##_group_visible(kobj)) \ - return SYSFS_GROUP_INVISIBLE; \ - return name##_attr_visible(kobj, attr, n); \ +#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, const struct bin_attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ } -#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ - static inline umode_t sysfs_group_visible_##name( \ - struct kobject *kobj, struct bin_attribute *a, int n) \ - { \ - if (n == 0 && !name##_group_visible(kobj)) \ - return SYSFS_GROUP_INVISIBLE; \ - return a->mode; \ +#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, const struct bin_attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ } #define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn -- cgit v1.2.3 From 94a20fb9af16417ab5fd17bcde3d906926f15ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Nov 2024 17:03:35 +0000 Subject: sysfs: treewide: constify attribute callback of bin_attribute::mmap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mmap() callbacks should not modify the struct bin_attribute passed as argument. Enforce this by marking the argument as const. As there are not many callback implementers perform this change throughout the tree at once. Signed-off-by: Thomas Weißschuh Acked-by: Andrew Donnellan # ocxl Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-6-71110628844c@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d1b22d56198b..9fcdc8cd3118 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -309,7 +309,7 @@ struct bin_attribute { char *, loff_t, size_t); loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, loff_t, int); - int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, + int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, struct vm_area_struct *vma); }; -- cgit v1.2.3 From 699e7b85afb5d94b99b0a3edca7e9e93ea320c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Nov 2024 17:03:36 +0000 Subject: sysfs: treewide: constify attribute callback of bin_attribute::llseek() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The llseek() callbacks should not modify the struct bin_attribute passed as argument. Enforce this by marking the argument as const. As there are not many callback implementers perform this change throughout the tree at once. Signed-off-by: Thomas Weißschuh Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-7-71110628844c@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9fcdc8cd3118..cb2a5e277c23 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -307,7 +307,7 @@ struct bin_attribute { char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); - loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *, loff_t, int); int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, struct vm_area_struct *vma); -- cgit v1.2.3 From ae587a509903cca138e910445d8c21fe73b45c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Nov 2024 17:03:37 +0000 Subject: sysfs: implement all BIN_ATTR_* macros in terms of __BIN_ATTR() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The preparations for the upcoming constification of struct bin_attribute requires some logic in the structure definition macros. To avoid duplication of that logic in multiple macros, reimplement all other macros in terms of __BIN_ATTR(). Signed-off-by: Thomas Weißschuh Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-8-71110628844c@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index cb2a5e277c23..d17c473c1ef2 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -333,17 +333,11 @@ struct bin_attribute { .size = _size, \ } -#define __BIN_ATTR_RO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .read = _name##_read, \ - .size = _size, \ -} +#define __BIN_ATTR_RO(_name, _size) \ + __BIN_ATTR(_name, 0444, _name##_read, NULL, _size) -#define __BIN_ATTR_WO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0200 }, \ - .write = _name##_write, \ - .size = _size, \ -} +#define __BIN_ATTR_WO(_name, _size) \ + __BIN_ATTR(_name, 0200, NULL, _name##_write, _size) #define __BIN_ATTR_RW(_name, _size) \ __BIN_ATTR(_name, 0644, _name##_read, _name##_write, _size) @@ -364,11 +358,8 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size) struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) -#define __BIN_ATTR_ADMIN_RO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0400 }, \ - .read = _name##_read, \ - .size = _size, \ -} +#define __BIN_ATTR_ADMIN_RO(_name, _size) \ + __BIN_ATTR(_name, 0400, _name##_read, NULL, _size) #define __BIN_ATTR_ADMIN_RW(_name, _size) \ __BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size) @@ -379,10 +370,8 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size) #define BIN_ATTR_ADMIN_RW(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size) -#define __BIN_ATTR_SIMPLE_RO(_name, _mode) { \ - .attr = { .name = __stringify(_name), .mode = _mode }, \ - .read = sysfs_bin_attr_simple_read, \ -} +#define __BIN_ATTR_SIMPLE_RO(_name, _mode) \ + __BIN_ATTR(_name, _mode, sysfs_bin_attr_simple_read, NULL, 0) #define BIN_ATTR_SIMPLE_RO(_name) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0444) -- cgit v1.2.3 From eb2e6c3a8d66ff37b2ee26cd32334ae0e05fd596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 3 Nov 2024 17:03:38 +0000 Subject: sysfs: bin_attribute: add const read/write callback variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make it possible to put struct bin_attribute into read-only memory, the sysfs core has to stop passing mutable pointers to the read() and write() callbacks. As there are numerous implementors of these callbacks throughout the tree it's not possible to change all of them at once. To enable a step-by-step transition, add new variants of the read() and write() callbacks which differ only in the constness of the struct bin_attribute argument. As most binary attributes are defined through macros, extend these macros to transparently handle both variants of callbacks to minimize the churn during the transition. As soon as all handlers are switch to the const variant, the non-const one can be removed together with the transition machinery. Signed-off-by: Thomas Weißschuh Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-9-71110628844c@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d17c473c1ef2..d713a6445a62 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -305,8 +305,12 @@ struct bin_attribute { struct address_space *(*f_mapping)(void); ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *, + char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + ssize_t (*write_new)(struct file *, struct kobject *, + const struct bin_attribute *, char *, loff_t, size_t); loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *, loff_t, int); int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, @@ -325,11 +329,28 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) +typedef ssize_t __sysfs_bin_rw_handler_new(struct file *, struct kobject *, + const struct bin_attribute *, char *, loff_t, size_t); + /* macros to create static binary attributes easier */ #define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ - .read = _read, \ - .write = _write, \ + .read = _Generic(_read, \ + __sysfs_bin_rw_handler_new * : NULL, \ + default : _read \ + ), \ + .read_new = _Generic(_read, \ + __sysfs_bin_rw_handler_new * : _read, \ + default : NULL \ + ), \ + .write = _Generic(_write, \ + __sysfs_bin_rw_handler_new * : NULL, \ + default : _write \ + ), \ + .write_new = _Generic(_write, \ + __sysfs_bin_rw_handler_new * : _write, \ + default : NULL \ + ), \ .size = _size, \ } -- cgit v1.2.3 From b3e8f29d6b45e865bab9a9964709ff7413e33e85 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Nov 2024 10:41:15 +0100 Subject: netfilter: nf_tables: avoid false-positive lockdep splats with flowtables The transaction mutex prevents concurrent add/delete, its ok to iterate those lists outside of rcu read side critical sections. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 91ae20cb7648..c1513bd14568 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1463,7 +1463,8 @@ struct nft_flowtable { struct nf_flowtable data; }; -struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, +struct nft_flowtable *nft_flowtable_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u8 genmask); -- cgit v1.2.3 From 64c58d7c99343a910edf995e15d8037e19ec5777 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Nov 2024 20:19:16 -0800 Subject: iomap: add a merge boundary flag File systems might have boundaries over which merges aren't possible. In fact these are very common, although most of the time some kind of header at the beginning of this region (e.g. XFS alloation groups, ext4 block groups) automatically create a merge barrier. But if that is not present, say for a device purely used for data we need to manually communicate that to iomap. Add a IOMAP_F_BOUNDARY flag to never merge I/O into a previous mapping. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f61407e3b121..9ecb8ea7714c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -53,6 +53,9 @@ struct vm_fault; * * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent * rather than a file data extent. + * + * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must + * never be merged with the mapping before it. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -64,6 +67,7 @@ struct vm_fault; #define IOMAP_F_BUFFER_HEAD 0 #endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) +#define IOMAP_F_BOUNDARY (1U << 6) /* * Flags set by the core iomap code during operations: -- cgit v1.2.3 From 57a063632df8db6cb20d64ee52a06d4e2049235a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Oct 2024 11:45:24 -0700 Subject: Input: introduce notion of passive observers for input handlers Sometimes it is useful to observe (and maybe modify) data coming from an input device, but only do that if there are other users of such input device. An example is touchpad switching functionality on Lenovo IdeaPad Z570 where it is desirable to suppress events coming from the touchpad if user toggles touchpad on/off button (on this laptop the firmware does not stop the device). Introduce notion of passive observers for input handlers to solve this issue. An input handler marked as passive observer behaves exactly like any other input handler or filter, but with one exception: it does not open/start underlying input device when attaching to it. Link: https://lore.kernel.org/r/ZxlEROX7bMo5cbZP@google.com Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 89a0be6ee0e2..6437c35f0796 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -286,6 +286,10 @@ struct input_handle; * @start: starts handler for given handle. This function is called by * input core right after connect() method and also when a process * that "grabbed" a device releases it + * @passive_observer: set to %true by drivers only interested in observing + * data stream from devices if there are other users present. Such + * drivers will not result in starting underlying hardware device + * when input_open_device() is called for their handles * @legacy_minors: set to %true by drivers using legacy minor ranges * @minor: beginning of range of 32 legacy minors for devices this driver * can provide @@ -321,6 +325,7 @@ struct input_handler { void (*disconnect)(struct input_handle *handle); void (*start)(struct input_handle *handle); + bool passive_observer; bool legacy_minors; int minor; const char *name; -- cgit v1.2.3 From 3ee315537e941fb92cff31b259641d19e798b4e6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 27 Oct 2024 03:24:41 +0200 Subject: dt-bindings: clock: qcom: document SAR2130P Global Clock Controller Add bindings for the Global Clock Controller (GCC) present on the Qualcomm SAR2130P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241027-sar2130p-clocks-v5-2-ecad2a1432ba@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sar2130p-gcc.h | 185 ++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sar2130p-gcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sar2130p-gcc.h b/include/dt-bindings/clock/qcom,sar2130p-gcc.h new file mode 100644 index 000000000000..69d2dd2538a6 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sar2130p-gcc.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SAR2130P_H +#define _DT_BINDINGS_CLK_QCOM_GCC_SAR2130P_H + +/* GCC clocks */ +#define GCC_GPLL0 0 +#define GCC_GPLL0_OUT_EVEN 1 +#define GCC_GPLL1 2 +#define GCC_GPLL9 3 +#define GCC_GPLL9_OUT_EVEN 4 +#define GCC_AGGRE_NOC_PCIE_1_AXI_CLK 5 +#define GCC_AGGRE_USB3_PRIM_AXI_CLK 6 +#define GCC_BOOT_ROM_AHB_CLK 7 +#define GCC_CAMERA_AHB_CLK 8 +#define GCC_CAMERA_HF_AXI_CLK 9 +#define GCC_CAMERA_SF_AXI_CLK 10 +#define GCC_CAMERA_XO_CLK 11 +#define GCC_CFG_NOC_PCIE_ANOC_AHB_CLK 12 +#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK 13 +#define GCC_DDRSS_GPU_AXI_CLK 14 +#define GCC_DDRSS_PCIE_SF_CLK 15 +#define GCC_DISP_AHB_CLK 16 +#define GCC_DISP_HF_AXI_CLK 17 +#define GCC_GP1_CLK 18 +#define GCC_GP1_CLK_SRC 19 +#define GCC_GP2_CLK 20 +#define GCC_GP2_CLK_SRC 21 +#define GCC_GP3_CLK 22 +#define GCC_GP3_CLK_SRC 23 +#define GCC_GPU_CFG_AHB_CLK 24 +#define GCC_GPU_GPLL0_CLK_SRC 25 +#define GCC_GPU_GPLL0_DIV_CLK_SRC 26 +#define GCC_GPU_MEMNOC_GFX_CLK 27 +#define GCC_GPU_SNOC_DVM_GFX_CLK 28 +#define GCC_IRIS_SS_HF_AXI1_CLK 29 +#define GCC_IRIS_SS_SPD_AXI1_CLK 30 +#define GCC_PCIE_0_AUX_CLK 31 +#define GCC_PCIE_0_AUX_CLK_SRC 32 +#define GCC_PCIE_0_CFG_AHB_CLK 33 +#define GCC_PCIE_0_MSTR_AXI_CLK 34 +#define GCC_PCIE_0_PHY_RCHNG_CLK 35 +#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC 36 +#define GCC_PCIE_0_PIPE_CLK 37 +#define GCC_PCIE_0_PIPE_CLK_SRC 38 +#define GCC_PCIE_0_SLV_AXI_CLK 39 +#define GCC_PCIE_0_SLV_Q2A_AXI_CLK 40 +#define GCC_PCIE_1_AUX_CLK 41 +#define GCC_PCIE_1_AUX_CLK_SRC 42 +#define GCC_PCIE_1_CFG_AHB_CLK 43 +#define GCC_PCIE_1_MSTR_AXI_CLK 44 +#define GCC_PCIE_1_PHY_RCHNG_CLK 45 +#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC 46 +#define GCC_PCIE_1_PIPE_CLK 47 +#define GCC_PCIE_1_PIPE_CLK_SRC 48 +#define GCC_PCIE_1_SLV_AXI_CLK 49 +#define GCC_PCIE_1_SLV_Q2A_AXI_CLK 50 +#define GCC_PDM2_CLK 51 +#define GCC_PDM2_CLK_SRC 52 +#define GCC_PDM_AHB_CLK 53 +#define GCC_PDM_XO4_CLK 54 +#define GCC_QMIP_CAMERA_NRT_AHB_CLK 55 +#define GCC_QMIP_CAMERA_RT_AHB_CLK 56 +#define GCC_QMIP_GPU_AHB_CLK 57 +#define GCC_QMIP_PCIE_AHB_CLK 58 +#define GCC_QMIP_VIDEO_CV_CPU_AHB_CLK 59 +#define GCC_QMIP_VIDEO_CVP_AHB_CLK 60 +#define GCC_QMIP_VIDEO_LSR_AHB_CLK 61 +#define GCC_QMIP_VIDEO_V_CPU_AHB_CLK 62 +#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK 63 +#define GCC_QUPV3_WRAP0_CORE_2X_CLK 64 +#define GCC_QUPV3_WRAP0_CORE_CLK 65 +#define GCC_QUPV3_WRAP0_S0_CLK 66 +#define GCC_QUPV3_WRAP0_S0_CLK_SRC 67 +#define GCC_QUPV3_WRAP0_S1_CLK 68 +#define GCC_QUPV3_WRAP0_S1_CLK_SRC 69 +#define GCC_QUPV3_WRAP0_S2_CLK 70 +#define GCC_QUPV3_WRAP0_S2_CLK_SRC 71 +#define GCC_QUPV3_WRAP0_S3_CLK 72 +#define GCC_QUPV3_WRAP0_S3_CLK_SRC 73 +#define GCC_QUPV3_WRAP0_S4_CLK 74 +#define GCC_QUPV3_WRAP0_S4_CLK_SRC 75 +#define GCC_QUPV3_WRAP0_S5_CLK 76 +#define GCC_QUPV3_WRAP0_S5_CLK_SRC 77 +#define GCC_QUPV3_WRAP1_CORE_2X_CLK 78 +#define GCC_QUPV3_WRAP1_CORE_CLK 79 +#define GCC_QUPV3_WRAP1_S0_CLK 80 +#define GCC_QUPV3_WRAP1_S0_CLK_SRC 81 +#define GCC_QUPV3_WRAP1_S1_CLK 82 +#define GCC_QUPV3_WRAP1_S1_CLK_SRC 83 +#define GCC_QUPV3_WRAP1_S2_CLK 84 +#define GCC_QUPV3_WRAP1_S2_CLK_SRC 85 +#define GCC_QUPV3_WRAP1_S3_CLK 86 +#define GCC_QUPV3_WRAP1_S3_CLK_SRC 87 +#define GCC_QUPV3_WRAP1_S4_CLK 88 +#define GCC_QUPV3_WRAP1_S4_CLK_SRC 89 +#define GCC_QUPV3_WRAP1_S5_CLK 90 +#define GCC_QUPV3_WRAP1_S5_CLK_SRC 91 +#define GCC_QUPV3_WRAP_0_M_AHB_CLK 92 +#define GCC_QUPV3_WRAP_0_S_AHB_CLK 93 +#define GCC_QUPV3_WRAP_1_M_AHB_CLK 94 +#define GCC_QUPV3_WRAP_1_S_AHB_CLK 95 +#define GCC_SDCC1_AHB_CLK 96 +#define GCC_SDCC1_APPS_CLK 97 +#define GCC_SDCC1_APPS_CLK_SRC 98 +#define GCC_SDCC1_ICE_CORE_CLK 99 +#define GCC_SDCC1_ICE_CORE_CLK_SRC 100 +#define GCC_USB30_PRIM_MASTER_CLK 101 +#define GCC_USB30_PRIM_MASTER_CLK_SRC 102 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK 103 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC 104 +#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC 105 +#define GCC_USB30_PRIM_SLEEP_CLK 106 +#define GCC_USB3_PRIM_PHY_AUX_CLK 107 +#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC 108 +#define GCC_USB3_PRIM_PHY_COM_AUX_CLK 109 +#define GCC_USB3_PRIM_PHY_PIPE_CLK 110 +#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC 111 +#define GCC_VIDEO_AHB_CLK 112 +#define GCC_VIDEO_AXI0_CLK 113 +#define GCC_VIDEO_AXI1_CLK 114 +#define GCC_VIDEO_XO_CLK 115 +#define GCC_GPLL4 116 +#define GCC_GPLL5 117 +#define GCC_GPLL7 118 +#define GCC_DDRSS_SPAD_CLK 119 +#define GCC_DDRSS_SPAD_CLK_SRC 120 +#define GCC_VIDEO_AXI0_SREG 121 +#define GCC_VIDEO_AXI1_SREG 122 +#define GCC_IRIS_SS_HF_AXI1_SREG 123 +#define GCC_IRIS_SS_SPD_AXI1_SREG 124 + +/* GCC resets */ +#define GCC_CAMERA_BCR 0 +#define GCC_DISPLAY_BCR 1 +#define GCC_GPU_BCR 2 +#define GCC_PCIE_0_BCR 3 +#define GCC_PCIE_0_LINK_DOWN_BCR 4 +#define GCC_PCIE_0_NOCSR_COM_PHY_BCR 5 +#define GCC_PCIE_0_PHY_BCR 6 +#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR 7 +#define GCC_PCIE_1_BCR 8 +#define GCC_PCIE_1_LINK_DOWN_BCR 9 +#define GCC_PCIE_1_NOCSR_COM_PHY_BCR 10 +#define GCC_PCIE_1_PHY_BCR 11 +#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR 12 +#define GCC_PCIE_PHY_BCR 13 +#define GCC_PCIE_PHY_CFG_AHB_BCR 14 +#define GCC_PCIE_PHY_COM_BCR 15 +#define GCC_PDM_BCR 16 +#define GCC_QUPV3_WRAPPER_0_BCR 17 +#define GCC_QUPV3_WRAPPER_1_BCR 18 +#define GCC_QUSB2PHY_PRIM_BCR 19 +#define GCC_QUSB2PHY_SEC_BCR 20 +#define GCC_SDCC1_BCR 21 +#define GCC_USB30_PRIM_BCR 22 +#define GCC_USB3_DP_PHY_PRIM_BCR 23 +#define GCC_USB3_DP_PHY_SEC_BCR 24 +#define GCC_USB3_PHY_PRIM_BCR 25 +#define GCC_USB3_PHY_SEC_BCR 26 +#define GCC_USB3PHY_PHY_PRIM_BCR 27 +#define GCC_USB3PHY_PHY_SEC_BCR 28 +#define GCC_VIDEO_AXI0_CLK_ARES 29 +#define GCC_VIDEO_AXI1_CLK_ARES 30 +#define GCC_VIDEO_BCR 31 +#define GCC_IRIS_SS_HF_AXI_CLK_ARES 32 +#define GCC_IRIS_SS_SPD_AXI_CLK_ARES 33 +#define GCC_DDRSS_SPAD_CLK_ARES 34 + +/* GCC power domains */ +#define PCIE_0_GDSC 0 +#define PCIE_0_PHY_GDSC 1 +#define PCIE_1_GDSC 2 +#define PCIE_1_PHY_GDSC 3 +#define USB30_PRIM_GDSC 4 +#define USB3_PHY_GDSC 5 +#define HLOS1_VOTE_MM_SNOC_MMU_TBU_HF0_GDSC 6 +#define HLOS1_VOTE_MM_SNOC_MMU_TBU_SF0_GDSC 7 +#define HLOS1_VOTE_TURING_MMU_TBU0_GDSC 8 +#define HLOS1_VOTE_TURING_MMU_TBU1_GDSC 9 + +#endif -- cgit v1.2.3 From 111481020aa599764790c807312465a4c94f4b5c Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sun, 27 Oct 2024 03:24:44 +0200 Subject: dt-bindings: clk: qcom,sm8450-gpucc: add SAR2130P compatibles Expand qcom,sm8450-gpucc bindings to include SAR2130P. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241027-sar2130p-clocks-v5-5-ecad2a1432ba@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sar2130p-gpucc.h | 33 +++++++++++++++++++++++++ include/dt-bindings/reset/qcom,sar2130p-gpucc.h | 14 +++++++++++ 2 files changed, 47 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sar2130p-gpucc.h create mode 100644 include/dt-bindings/reset/qcom,sar2130p-gpucc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sar2130p-gpucc.h b/include/dt-bindings/clock/qcom,sar2130p-gpucc.h new file mode 100644 index 000000000000..a2204369110a --- /dev/null +++ b/include/dt-bindings/clock/qcom,sar2130p-gpucc.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved + * Copyright (c) 2024, Linaro Limited + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SAR2130P_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SAR2130P_H + +/* GPU_CC clocks */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_FF_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CXO_AON_CLK 4 +#define GPU_CC_CXO_CLK 5 +#define GPU_CC_FF_CLK_SRC 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GMU_CLK 8 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 9 +#define GPU_CC_HUB_AON_CLK 10 +#define GPU_CC_HUB_CLK_SRC 11 +#define GPU_CC_HUB_CX_INT_CLK 12 +#define GPU_CC_MEMNOC_GFX_CLK 13 +#define GPU_CC_PLL0 14 +#define GPU_CC_PLL1 15 +#define GPU_CC_SLEEP_CLK 16 + +/* GDSCs */ +#define GPU_GX_GDSC 0 +#define GPU_CX_GDSC 1 + +#endif diff --git a/include/dt-bindings/reset/qcom,sar2130p-gpucc.h b/include/dt-bindings/reset/qcom,sar2130p-gpucc.h new file mode 100644 index 000000000000..99ba5f092e2a --- /dev/null +++ b/include/dt-bindings/reset/qcom,sar2130p-gpucc.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Linaro Limited + */ + +#ifndef _DT_BINDINGS_RESET_QCOM_GPU_CC_SAR2130P_H +#define _DT_BINDINGS_RESET_QCOM_GPU_CC_SAR2130P_H + +#define GPUCC_GPU_CC_GX_BCR 0 +#define GPUCC_GPU_CC_ACD_BCR 1 +#define GPUCC_GPU_CC_GX_ACD_IROOT_BCR 2 + +#endif -- cgit v1.2.3 From 03e525c66de2535dc1afd26be004621c7c5a253e Mon Sep 17 00:00:00 2001 From: Sricharan Ramabadhran Date: Mon, 28 Oct 2024 11:35:02 +0530 Subject: dt-bindings: clock: Add Qualcomm IPQ5424 GCC binding Add binding for the Qualcomm IPQ5424 Global Clock Controller Signed-off-by: Sricharan Ramabadhran Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241028060506.246606-3-quic_srichara@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq5424-gcc.h | 156 ++++++++++++++ include/dt-bindings/reset/qcom,ipq5424-gcc.h | 310 +++++++++++++++++++++++++++ 2 files changed, 466 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,ipq5424-gcc.h create mode 100644 include/dt-bindings/reset/qcom,ipq5424-gcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq5424-gcc.h b/include/dt-bindings/clock/qcom,ipq5424-gcc.h new file mode 100644 index 000000000000..755ce7a71c7c --- /dev/null +++ b/include/dt-bindings/clock/qcom,ipq5424-gcc.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018,2020 The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLOCK_IPQ_GCC_IPQ5424_H +#define _DT_BINDINGS_CLOCK_IPQ_GCC_IPQ5424_H + +#define GPLL0 0 +#define GPLL4 1 +#define GPLL2 2 +#define GPLL2_OUT_MAIN 3 +#define GCC_SLEEP_CLK_SRC 4 +#define GCC_APSS_DBG_CLK 5 +#define GCC_USB0_EUD_AT_CLK 6 +#define GCC_PCIE0_AXI_M_CLK_SRC 7 +#define GCC_PCIE0_AXI_M_CLK 8 +#define GCC_PCIE1_AXI_M_CLK_SRC 9 +#define GCC_PCIE1_AXI_M_CLK 10 +#define GCC_PCIE2_AXI_M_CLK_SRC 11 +#define GCC_PCIE2_AXI_M_CLK 12 +#define GCC_PCIE3_AXI_M_CLK_SRC 13 +#define GCC_PCIE3_AXI_M_CLK 14 +#define GCC_PCIE0_AXI_S_CLK_SRC 15 +#define GCC_PCIE0_AXI_S_BRIDGE_CLK 16 +#define GCC_PCIE0_AXI_S_CLK 17 +#define GCC_PCIE1_AXI_S_CLK_SRC 18 +#define GCC_PCIE1_AXI_S_BRIDGE_CLK 19 +#define GCC_PCIE1_AXI_S_CLK 20 +#define GCC_PCIE2_AXI_S_CLK_SRC 21 +#define GCC_PCIE2_AXI_S_BRIDGE_CLK 22 +#define GCC_PCIE2_AXI_S_CLK 23 +#define GCC_PCIE3_AXI_S_CLK_SRC 24 +#define GCC_PCIE3_AXI_S_BRIDGE_CLK 25 +#define GCC_PCIE3_AXI_S_CLK 26 +#define GCC_PCIE0_PIPE_CLK_SRC 27 +#define GCC_PCIE0_PIPE_CLK 28 +#define GCC_PCIE1_PIPE_CLK_SRC 29 +#define GCC_PCIE1_PIPE_CLK 30 +#define GCC_PCIE2_PIPE_CLK_SRC 31 +#define GCC_PCIE2_PIPE_CLK 32 +#define GCC_PCIE3_PIPE_CLK_SRC 33 +#define GCC_PCIE3_PIPE_CLK 34 +#define GCC_PCIE_AUX_CLK_SRC 35 +#define GCC_PCIE0_AUX_CLK 36 +#define GCC_PCIE1_AUX_CLK 37 +#define GCC_PCIE2_AUX_CLK 38 +#define GCC_PCIE3_AUX_CLK 39 +#define GCC_PCIE0_AHB_CLK 40 +#define GCC_PCIE1_AHB_CLK 41 +#define GCC_PCIE2_AHB_CLK 42 +#define GCC_PCIE3_AHB_CLK 43 +#define GCC_USB0_AUX_CLK_SRC 44 +#define GCC_USB0_AUX_CLK 45 +#define GCC_USB0_MASTER_CLK 46 +#define GCC_USB0_MOCK_UTMI_CLK_SRC 47 +#define GCC_USB0_MOCK_UTMI_DIV_CLK_SRC 48 +#define GCC_USB0_MOCK_UTMI_CLK 49 +#define GCC_USB0_PIPE_CLK_SRC 50 +#define GCC_USB0_PIPE_CLK 51 +#define GCC_USB0_PHY_CFG_AHB_CLK 52 +#define GCC_USB0_SLEEP_CLK 53 +#define GCC_SDCC1_APPS_CLK_SRC 54 +#define GCC_SDCC1_APPS_CLK 55 +#define GCC_SDCC1_ICE_CORE_CLK_SRC 56 +#define GCC_SDCC1_ICE_CORE_CLK 57 +#define GCC_SDCC1_AHB_CLK 58 +#define GCC_PCNOC_BFDCD_CLK_SRC 59 +#define GCC_NSSCFG_CLK 60 +#define GCC_NSSNOC_NSSCC_CLK 61 +#define GCC_NSSCC_CLK 62 +#define GCC_NSSNOC_PCNOC_1_CLK 63 +#define GCC_QPIC_AHB_CLK 64 +#define GCC_QPIC_CLK 65 +#define GCC_MDIO_AHB_CLK 66 +#define GCC_PRNG_AHB_CLK 67 +#define GCC_UNIPHY0_AHB_CLK 68 +#define GCC_UNIPHY1_AHB_CLK 69 +#define GCC_UNIPHY2_AHB_CLK 70 +#define GCC_CMN_12GPLL_AHB_CLK 71 +#define GCC_SYSTEM_NOC_BFDCD_CLK_SRC 72 +#define GCC_NSSNOC_SNOC_CLK 73 +#define GCC_NSSNOC_SNOC_1_CLK 74 +#define GCC_WCSS_AHB_CLK_SRC 75 +#define GCC_QDSS_AT_CLK_SRC 76 +#define GCC_NSSNOC_ATB_CLK 77 +#define GCC_QDSS_AT_CLK 78 +#define GCC_QDSS_TSCTR_CLK_SRC 79 +#define GCC_NSS_TS_CLK 80 +#define GCC_QPIC_IO_MACRO_CLK_SRC 81 +#define GCC_QPIC_IO_MACRO_CLK 82 +#define GCC_LPASS_AXIM_CLK_SRC 83 +#define GCC_LPASS_CORE_AXIM_CLK 84 +#define GCC_LPASS_SWAY_CLK_SRC 85 +#define GCC_LPASS_SWAY_CLK 86 +#define GCC_CNOC_LPASS_CFG_CLK 87 +#define GCC_SNOC_LPASS_CLK 88 +#define GCC_ADSS_PWM_CLK_SRC 89 +#define GCC_ADSS_PWM_CLK 90 +#define GCC_XO_CLK_SRC 91 +#define GCC_NSSNOC_XO_DCD_CLK 92 +#define GCC_NSSNOC_QOSGEN_REF_CLK 93 +#define GCC_NSSNOC_TIMEOUT_REF_CLK 94 +#define GCC_UNIPHY0_SYS_CLK 95 +#define GCC_UNIPHY1_SYS_CLK 96 +#define GCC_UNIPHY2_SYS_CLK 97 +#define GCC_CMN_12GPLL_SYS_CLK 98 +#define GCC_UNIPHY_SYS_CLK_SRC 99 +#define GCC_NSS_TS_CLK_SRC 100 +#define GCC_ANOC_PCIE0_1LANE_M_CLK 101 +#define GCC_ANOC_PCIE1_1LANE_M_CLK 102 +#define GCC_ANOC_PCIE2_2LANE_M_CLK 103 +#define GCC_ANOC_PCIE3_2LANE_M_CLK 104 +#define GCC_CNOC_PCIE0_1LANE_S_CLK 105 +#define GCC_CNOC_PCIE1_1LANE_S_CLK 106 +#define GCC_CNOC_PCIE2_2LANE_S_CLK 107 +#define GCC_CNOC_PCIE3_2LANE_S_CLK 108 +#define GCC_CNOC_USB_CLK 109 +#define GCC_CNOC_WCSS_AHB_CLK 110 +#define GCC_QUPV3_AHB_MST_CLK 111 +#define GCC_QUPV3_AHB_SLV_CLK 112 +#define GCC_QUPV3_I2C0_CLK 113 +#define GCC_QUPV3_I2C1_CLK 114 +#define GCC_QUPV3_SPI0_CLK 115 +#define GCC_QUPV3_SPI1_CLK 116 +#define GCC_QUPV3_UART0_CLK 117 +#define GCC_QUPV3_UART1_CLK 118 +#define GCC_QPIC_CLK_SRC 119 +#define GCC_QUPV3_I2C0_CLK_SRC 120 +#define GCC_QUPV3_I2C1_CLK_SRC 121 +#define GCC_QUPV3_I2C0_DIV_CLK_SRC 122 +#define GCC_QUPV3_I2C1_DIV_CLK_SRC 123 +#define GCC_QUPV3_SPI0_CLK_SRC 124 +#define GCC_QUPV3_SPI1_CLK_SRC 125 +#define GCC_QUPV3_UART0_CLK_SRC 126 +#define GCC_QUPV3_UART1_CLK_SRC 127 +#define GCC_USB1_MASTER_CLK 128 +#define GCC_USB1_MOCK_UTMI_CLK_SRC 129 +#define GCC_USB1_MOCK_UTMI_DIV_CLK_SRC 130 +#define GCC_USB1_MOCK_UTMI_CLK 131 +#define GCC_USB1_SLEEP_CLK 132 +#define GCC_USB1_PHY_CFG_AHB_CLK 133 +#define GCC_USB0_MASTER_CLK_SRC 134 +#define GCC_QDSS_DAP_CLK 135 +#define GCC_PCIE0_RCHNG_CLK_SRC 136 +#define GCC_PCIE0_RCHNG_CLK 137 +#define GCC_PCIE1_RCHNG_CLK_SRC 138 +#define GCC_PCIE1_RCHNG_CLK 139 +#define GCC_PCIE2_RCHNG_CLK_SRC 140 +#define GCC_PCIE2_RCHNG_CLK 141 +#define GCC_PCIE3_RCHNG_CLK_SRC 142 +#define GCC_PCIE3_RCHNG_CLK 143 +#define GCC_IM_SLEEP_CLK 144 + +#endif diff --git a/include/dt-bindings/reset/qcom,ipq5424-gcc.h b/include/dt-bindings/reset/qcom,ipq5424-gcc.h new file mode 100644 index 000000000000..16a72771c79a --- /dev/null +++ b/include/dt-bindings/reset/qcom,ipq5424-gcc.h @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018,2020 The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_RESET_IPQ_GCC_IPQ5424_H +#define _DT_BINDINGS_RESET_IPQ_GCC_IPQ5424_H + +#define GCC_QUPV3_BCR 0 +#define GCC_QUPV3_I2C0_BCR 1 +#define GCC_QUPV3_UART0_BCR 2 +#define GCC_QUPV3_I2C1_BCR 3 +#define GCC_QUPV3_UART1_BCR 4 +#define GCC_QUPV3_SPI0_BCR 5 +#define GCC_QUPV3_SPI1_BCR 6 +#define GCC_IMEM_BCR 7 +#define GCC_TME_BCR 8 +#define GCC_DDRSS_BCR 9 +#define GCC_PRNG_BCR 10 +#define GCC_BOOT_ROM_BCR 11 +#define GCC_NSS_BCR 12 +#define GCC_MDIO_BCR 13 +#define GCC_UNIPHY0_BCR 14 +#define GCC_UNIPHY1_BCR 15 +#define GCC_UNIPHY2_BCR 16 +#define GCC_WCSS_BCR 17 +#define GCC_SEC_CTRL_BCR 19 +#define GCC_TME_SEC_BUS_BCR 20 +#define GCC_ADSS_BCR 21 +#define GCC_LPASS_BCR 22 +#define GCC_PCIE0_BCR 23 +#define GCC_PCIE0_LINK_DOWN_BCR 24 +#define GCC_PCIE0PHY_PHY_BCR 25 +#define GCC_PCIE0_PHY_BCR 26 +#define GCC_PCIE1_BCR 27 +#define GCC_PCIE1_LINK_DOWN_BCR 28 +#define GCC_PCIE1PHY_PHY_BCR 29 +#define GCC_PCIE1_PHY_BCR 30 +#define GCC_PCIE2_BCR 31 +#define GCC_PCIE2_LINK_DOWN_BCR 32 +#define GCC_PCIE2PHY_PHY_BCR 33 +#define GCC_PCIE2_PHY_BCR 34 +#define GCC_PCIE3_BCR 35 +#define GCC_PCIE3_LINK_DOWN_BCR 36 +#define GCC_PCIE3PHY_PHY_BCR 37 +#define GCC_PCIE3_PHY_BCR 38 +#define GCC_USB_BCR 39 +#define GCC_QUSB2_0_PHY_BCR 40 +#define GCC_USB0_PHY_BCR 41 +#define GCC_USB3PHY_0_PHY_BCR 42 +#define GCC_QDSS_BCR 43 +#define GCC_SNOC_BCR 44 +#define GCC_ANOC_BCR 45 +#define GCC_PCNOC_BCR 46 +#define GCC_PCNOC_BUS_TIMEOUT0_BCR 47 +#define GCC_PCNOC_BUS_TIMEOUT1_BCR 48 +#define GCC_PCNOC_BUS_TIMEOUT2_BCR 49 +#define GCC_PCNOC_BUS_TIMEOUT3_BCR 50 +#define GCC_PCNOC_BUS_TIMEOUT4_BCR 51 +#define GCC_PCNOC_BUS_TIMEOUT5_BCR 52 +#define GCC_PCNOC_BUS_TIMEOUT6_BCR 53 +#define GCC_PCNOC_BUS_TIMEOUT7_BCR 54 +#define GCC_PCNOC_BUS_TIMEOUT8_BCR 55 +#define GCC_PCNOC_BUS_TIMEOUT9_BCR 56 +#define GCC_QPIC_BCR 57 +#define GCC_SDCC_BCR 58 +#define GCC_DCC_BCR 59 +#define GCC_SPDM_BCR 60 +#define GCC_MPM_BCR 61 +#define GCC_APC0_VOLTAGE_DROOP_DETECTOR_BCR 62 +#define GCC_RBCPR_BCR 63 +#define GCC_CMN_BLK_BCR 64 +#define GCC_TCSR_BCR 65 +#define GCC_TLMM_BCR 66 +#define GCC_QUPV3_AHB_MST_ARES 67 +#define GCC_QUPV3_CORE_ARES 68 +#define GCC_QUPV3_2X_CORE_ARES 69 +#define GCC_QUPV3_SLEEP_ARES 70 +#define GCC_QUPV3_AHB_SLV_ARES 71 +#define GCC_QUPV3_I2C0_ARES 72 +#define GCC_QUPV3_UART0_ARES 73 +#define GCC_QUPV3_I2C1_ARES 74 +#define GCC_QUPV3_UART1_ARES 75 +#define GCC_QUPV3_SPI0_ARES 76 +#define GCC_QUPV3_SPI1_ARES 77 +#define GCC_DEBUG_ARES 78 +#define GCC_GP1_ARES 79 +#define GCC_GP2_ARES 80 +#define GCC_GP3_ARES 81 +#define GCC_IMEM_AXI_ARES 82 +#define GCC_IMEM_CFG_AHB_ARES 83 +#define GCC_TME_ARES 84 +#define GCC_TME_TS_ARES 85 +#define GCC_TME_SLOW_ARES 86 +#define GCC_TME_RTC_TOGGLE_ARES 87 +#define GCC_TIC_ARES 88 +#define GCC_PRNG_AHB_ARES 89 +#define GCC_BOOT_ROM_AHB_ARES 90 +#define GCC_NSSNOC_ATB_ARES 91 +#define GCC_NSS_TS_ARES 92 +#define GCC_NSSNOC_QOSGEN_REF_ARES 93 +#define GCC_NSSNOC_TIMEOUT_REF_ARES 94 +#define GCC_NSSNOC_MEMNOC_ARES 95 +#define GCC_NSSNOC_SNOC_ARES 96 +#define GCC_NSSCFG_ARES 97 +#define GCC_NSSNOC_NSSCC_ARES 98 +#define GCC_NSSCC_ARES 99 +#define GCC_MDIO_AHB_ARES 100 +#define GCC_UNIPHY0_SYS_ARES 101 +#define GCC_UNIPHY0_AHB_ARES 102 +#define GCC_UNIPHY1_SYS_ARES 103 +#define GCC_UNIPHY1_AHB_ARES 104 +#define GCC_UNIPHY2_SYS_ARES 105 +#define GCC_UNIPHY2_AHB_ARES 106 +#define GCC_NSSNOC_XO_DCD_ARES 107 +#define GCC_NSSNOC_SNOC_1_ARES 108 +#define GCC_NSSNOC_PCNOC_1_ARES 109 +#define GCC_NSSNOC_MEMNOC_1_ARES 110 +#define GCC_DDRSS_ATB_ARES 111 +#define GCC_DDRSS_AHB_ARES 112 +#define GCC_GEMNOC_AHB_ARES 113 +#define GCC_GEMNOC_Q6_AXI_ARES 114 +#define GCC_GEMNOC_NSSNOC_ARES 115 +#define GCC_GEMNOC_SNOC_ARES 116 +#define GCC_GEMNOC_APSS_ARES 117 +#define GCC_GEMNOC_QOSGEN_EXTREF_ARES 118 +#define GCC_GEMNOC_TS_ARES 119 +#define GCC_DDRSS_SMS_SLOW_ARES 120 +#define GCC_GEMNOC_CNOC_ARES 121 +#define GCC_GEMNOC_XO_DBG_ARES 122 +#define GCC_GEMNOC_ANOC_ARES 123 +#define GCC_DDRSS_LLCC_ATB_ARES 124 +#define GCC_LLCC_TPDM_CFG_ARES 125 +#define GCC_TME_BUS_ARES 126 +#define GCC_SEC_CTRL_ACC_ARES 127 +#define GCC_SEC_CTRL_ARES 128 +#define GCC_SEC_CTRL_SENSE_ARES 129 +#define GCC_SEC_CTRL_AHB_ARES 130 +#define GCC_SEC_CTRL_BOOT_ROM_PATCH_ARES 131 +#define GCC_ADSS_PWM_ARES 132 +#define GCC_TME_ATB_ARES 133 +#define GCC_TME_DBGAPB_ARES 134 +#define GCC_TME_DEBUG_ARES 135 +#define GCC_TME_AT_ARES 136 +#define GCC_TME_APB_ARES 137 +#define GCC_TME_DMI_DBG_HS_ARES 138 +#define GCC_APSS_AHB_ARES 139 +#define GCC_APSS_AXI_ARES 140 +#define GCC_CPUSS_TRIG_ARES 141 +#define GCC_APSS_DBG_ARES 142 +#define GCC_APSS_TS_ARES 143 +#define GCC_APSS_ATB_ARES 144 +#define GCC_Q6_AXIM_ARES 145 +#define GCC_Q6_AXIS_ARES 146 +#define GCC_Q6_AHB_ARES 147 +#define GCC_Q6_AHB_S_ARES 148 +#define GCC_Q6SS_ATBM_ARES 149 +#define GCC_Q6_TSCTR_1TO2_ARES 150 +#define GCC_Q6SS_PCLKDBG_ARES 151 +#define GCC_Q6SS_TRIG_ARES 152 +#define GCC_Q6SS_BOOT_CBCR_ARES 153 +#define GCC_WCSS_DBG_IFC_APB_ARES 154 +#define GCC_WCSS_DBG_IFC_ATB_ARES 155 +#define GCC_WCSS_DBG_IFC_NTS_ARES 156 +#define GCC_WCSS_DBG_IFC_DAPBUS_ARES 157 +#define GCC_WCSS_DBG_IFC_APB_BDG_ARES 158 +#define GCC_WCSS_DBG_IFC_NTS_BDG_ARES 159 +#define GCC_WCSS_DBG_IFC_DAPBUS_BDG_ARES 160 +#define GCC_WCSS_ECAHB_ARES 161 +#define GCC_WCSS_ACMT_ARES 162 +#define GCC_WCSS_AHB_S_ARES 163 +#define GCC_WCSS_AXI_M_ARES 164 +#define GCC_PCNOC_WAPSS_ARES 165 +#define GCC_SNOC_WAPSS_ARES 166 +#define GCC_LPASS_SWAY_ARES 167 +#define GCC_LPASS_CORE_AXIM_ARES 168 +#define GCC_PCIE0_AHB_ARES 169 +#define GCC_PCIE0_AXI_M_ARES 170 +#define GCC_PCIE0_AXI_S_ARES 171 +#define GCC_PCIE0_AXI_S_BRIDGE_ARES 172 +#define GCC_PCIE0_PIPE_ARES 173 +#define GCC_PCIE0_AUX_ARES 174 +#define GCC_PCIE1_AHB_ARES 175 +#define GCC_PCIE1_AXI_M_ARES 176 +#define GCC_PCIE1_AXI_S_ARES 177 +#define GCC_PCIE1_AXI_S_BRIDGE_ARES 178 +#define GCC_PCIE1_PIPE_ARES 179 +#define GCC_PCIE1_AUX_ARES 180 +#define GCC_PCIE2_AHB_ARES 181 +#define GCC_PCIE2_AXI_M_ARES 182 +#define GCC_PCIE2_AXI_S_ARES 183 +#define GCC_PCIE2_AXI_S_BRIDGE_ARES 184 +#define GCC_PCIE2_PIPE_ARES 185 +#define GCC_PCIE2_AUX_ARES 186 +#define GCC_PCIE3_AHB_ARES 187 +#define GCC_PCIE3_AXI_M_ARES 188 +#define GCC_PCIE3_AXI_S_ARES 189 +#define GCC_PCIE3_AXI_S_BRIDGE_ARES 190 +#define GCC_PCIE3_PIPE_ARES 191 +#define GCC_PCIE3_AUX_ARES 192 +#define GCC_USB0_MASTER_ARES 193 +#define GCC_USB0_AUX_ARES 194 +#define GCC_USB0_MOCK_UTMI_ARES 195 +#define GCC_USB0_PIPE_ARES 196 +#define GCC_USB0_SLEEP_ARES 197 +#define GCC_USB0_PHY_CFG_AHB_ARES 198 +#define GCC_QDSS_AT_ARES 199 +#define GCC_QDSS_STM_ARES 200 +#define GCC_QDSS_TRACECLKIN_ARES 201 +#define GCC_QDSS_TSCTR_DIV2_ARES 202 +#define GCC_QDSS_TSCTR_DIV3_ARES 203 +#define GCC_QDSS_TSCTR_DIV4_ARES 204 +#define GCC_QDSS_TSCTR_DIV8_ARES 205 +#define GCC_QDSS_TSCTR_DIV16_ARES 206 +#define GCC_QDSS_DAP_ARES 207 +#define GCC_QDSS_APB2JTAG_ARES 208 +#define GCC_QDSS_ETR_USB_ARES 209 +#define GCC_QDSS_DAP_AHB_ARES 210 +#define GCC_QDSS_CFG_AHB_ARES 211 +#define GCC_QDSS_EUD_AT_ARES 212 +#define GCC_QDSS_TS_ARES 213 +#define GCC_QDSS_USB_ARES 214 +#define GCC_SYS_NOC_AXI_ARES 215 +#define GCC_SNOC_QOSGEN_EXTREF_ARES 216 +#define GCC_CNOC_LPASS_CFG_ARES 217 +#define GCC_SYS_NOC_AT_ARES 218 +#define GCC_SNOC_PCNOC_AHB_ARES 219 +#define GCC_SNOC_TME_ARES 220 +#define GCC_SNOC_XO_DCD_ARES 221 +#define GCC_SNOC_TS_ARES 222 +#define GCC_ANOC0_AXI_ARES 223 +#define GCC_ANOC_PCIE0_1LANE_M_ARES 224 +#define GCC_ANOC_PCIE2_2LANE_M_ARES 225 +#define GCC_ANOC_PCIE1_1LANE_M_ARES 226 +#define GCC_ANOC_PCIE3_2LANE_M_ARES 227 +#define GCC_ANOC_PCNOC_AHB_ARES 228 +#define GCC_ANOC_QOSGEN_EXTREF_ARES 229 +#define GCC_ANOC_XO_DCD_ARES 230 +#define GCC_SNOC_XO_DBG_ARES 231 +#define GCC_AGGRNOC_ATB_ARES 232 +#define GCC_AGGRNOC_TS_ARES 233 +#define GCC_USB0_EUD_AT_ARES 234 +#define GCC_PCNOC_TIC_ARES 235 +#define GCC_PCNOC_AHB_ARES 236 +#define GCC_PCNOC_XO_DBG_ARES 237 +#define GCC_SNOC_LPASS_ARES 238 +#define GCC_PCNOC_AT_ARES 239 +#define GCC_PCNOC_XO_DCD_ARES 240 +#define GCC_PCNOC_TS_ARES 241 +#define GCC_PCNOC_BUS_TIMEOUT0_AHB_ARES 242 +#define GCC_PCNOC_BUS_TIMEOUT1_AHB_ARES 243 +#define GCC_PCNOC_BUS_TIMEOUT2_AHB_ARES 244 +#define GCC_PCNOC_BUS_TIMEOUT3_AHB_ARES 245 +#define GCC_PCNOC_BUS_TIMEOUT4_AHB_ARES 246 +#define GCC_PCNOC_BUS_TIMEOUT5_AHB_ARES 247 +#define GCC_PCNOC_BUS_TIMEOUT6_AHB_ARES 248 +#define GCC_PCNOC_BUS_TIMEOUT7_AHB_ARES 249 +#define GCC_Q6_AXIM_RESET 250 +#define GCC_Q6_AXIS_RESET 251 +#define GCC_Q6_AHB_S_RESET 252 +#define GCC_Q6_AHB_RESET 253 +#define GCC_Q6SS_DBG_RESET 254 +#define GCC_WCSS_ECAHB_RESET 255 +#define GCC_WCSS_DBG_BDG_RESET 256 +#define GCC_WCSS_DBG_RESET 257 +#define GCC_WCSS_AXI_M_RESET 258 +#define GCC_WCSS_AHB_S_RESET 259 +#define GCC_WCSS_ACMT_RESET 260 +#define GCC_WCSSAON_RESET 261 +#define GCC_PCIE0_PIPE_RESET 262 +#define GCC_PCIE0_CORE_STICKY_RESET 263 +#define GCC_PCIE0_AXI_S_STICKY_RESET 264 +#define GCC_PCIE0_AXI_S_RESET 265 +#define GCC_PCIE0_AXI_M_STICKY_RESET 266 +#define GCC_PCIE0_AXI_M_RESET 267 +#define GCC_PCIE0_AUX_RESET 268 +#define GCC_PCIE0_AHB_RESET 269 +#define GCC_PCIE1_PIPE_RESET 270 +#define GCC_PCIE1_CORE_STICKY_RESET 271 +#define GCC_PCIE1_AXI_S_STICKY_RESET 272 +#define GCC_PCIE1_AXI_S_RESET 273 +#define GCC_PCIE1_AXI_M_STICKY_RESET 274 +#define GCC_PCIE1_AXI_M_RESET 275 +#define GCC_PCIE1_AUX_RESET 276 +#define GCC_PCIE1_AHB_RESET 277 +#define GCC_PCIE2_PIPE_RESET 278 +#define GCC_PCIE2_CORE_STICKY_RESET 279 +#define GCC_PCIE2_AXI_S_STICKY_RESET 280 +#define GCC_PCIE2_AXI_S_RESET 281 +#define GCC_PCIE2_AXI_M_STICKY_RESET 282 +#define GCC_PCIE2_AXI_M_RESET 283 +#define GCC_PCIE2_AUX_RESET 284 +#define GCC_PCIE2_AHB_RESET 285 +#define GCC_PCIE3_PIPE_RESET 286 +#define GCC_PCIE3_CORE_STICKY_RESET 287 +#define GCC_PCIE3_AXI_S_STICKY_RESET 288 +#define GCC_PCIE3_AXI_S_RESET 289 +#define GCC_PCIE3_AXI_M_STICKY_RESET 290 +#define GCC_PCIE3_AXI_M_RESET 291 +#define GCC_PCIE3_AUX_RESET 292 +#define GCC_PCIE3_AHB_RESET 293 +#define GCC_NSS_PARTIAL_RESET 294 +#define GCC_UNIPHY0_XPCS_ARES 295 +#define GCC_UNIPHY1_XPCS_ARES 296 +#define GCC_UNIPHY2_XPCS_ARES 297 +#define GCC_USB1_BCR 298 +#define GCC_QUSB2_1_PHY_BCR 299 + +#endif -- cgit v1.2.3 From c9cfca98998eb1cd14bdeccd607982ae818711e7 Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Wed, 16 Oct 2024 20:45:26 +0530 Subject: dt-bindings: arm: qcom,ids: add SoC ID for IPQ5424/IPQ5404 Add the ID for Qualcomm IPQ5424/IPQ5404 SoC. Signed-off-by: Manikanta Mylavarapu Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241016151528.2893599-2-quic_mmanikan@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index b66f8aeb9f47..e850dc3a1ad3 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -279,6 +279,8 @@ #define QCOM_ID_QCM8550 604 #define QCOM_ID_IPQ5300 624 #define QCOM_ID_IPQ5321 650 +#define QCOM_ID_IPQ5424 651 +#define QCOM_ID_IPQ5404 671 #define QCOM_ID_QCS9100 667 #define QCOM_ID_QCS8300 674 #define QCOM_ID_QCS8275 675 -- cgit v1.2.3 From d2d243df445a88c26e91eac02b041213c7a32e9e Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Sun, 22 Sep 2024 12:32:13 +0800 Subject: mm: shmem: fix khugepaged activation policy for shmem Shmem has a separate interface (different from anonymous pages) to control huge page allocation, that means shmem THP can be enabled while anonymous THP is disabled. However, in this case, khugepaged will not start to collapse shmem THP, which is unreasonable. To fix this issue, we should call start_stop_khugepaged() to activate or deactivate the khugepaged thread when setting shmem mTHP interfaces. Moreover, add a new helper shmem_hpage_pmd_enabled() to help to check whether shmem THP is enabled, which will determine if khugepaged should be activated. Link: https://lkml.kernel.org/r/9b9c6cbc4499bf44c6455367fd9e0f6036525680.1726978977.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reported-by: Ryan Roberts Reviewed-by: Ryan Roberts Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 515a9a6a3c6f..ee6635052383 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -114,6 +114,7 @@ int shmem_unuse(unsigned int type); unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, loff_t write_end, bool shmem_huge_force); +bool shmem_hpage_pmd_enabled(void); #else static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, @@ -121,6 +122,11 @@ static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, { return 0; } + +static inline bool shmem_hpage_pmd_enabled(void) +{ + return false; +} #endif #ifdef CONFIG_SHMEM -- cgit v1.2.3 From 9e9e085effe9b7e342138fde3cf8577d22509932 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Sat, 27 Jul 2024 00:52:46 +0800 Subject: mm/vmalloc: combine all TLB flush operations of KASAN shadow virtual address into one operation When compiling kernel source 'make -j $(nproc)' with the up-and-running KASAN-enabled kernel on a 256-core machine, the following soft lockup is shown: watchdog: BUG: soft lockup - CPU#28 stuck for 22s! [kworker/28:1:1760] CPU: 28 PID: 1760 Comm: kworker/28:1 Kdump: loaded Not tainted 6.10.0-rc5 #95 Workqueue: events drain_vmap_area_work RIP: 0010:smp_call_function_many_cond+0x1d8/0xbb0 Code: 38 c8 7c 08 84 c9 0f 85 49 08 00 00 8b 45 08 a8 01 74 2e 48 89 f1 49 89 f7 48 c1 e9 03 41 83 e7 07 4c 01 e9 41 83 c7 03 f3 90 <0f> b6 01 41 38 c7 7c 08 84 c0 0f 85 d4 06 00 00 8b 45 08 a8 01 75 RSP: 0018:ffffc9000cb3fb60 EFLAGS: 00000202 RAX: 0000000000000011 RBX: ffff8883bc4469c0 RCX: ffffed10776e9949 RDX: 0000000000000002 RSI: ffff8883bb74ca48 RDI: ffffffff8434dc50 RBP: ffff8883bb74ca40 R08: ffff888103585dc0 R09: ffff8884533a1800 R10: 0000000000000004 R11: ffffffffffffffff R12: ffffed1077888d39 R13: dffffc0000000000 R14: ffffed1077888d38 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff8883bc400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005577b5c8d158 CR3: 0000000004850000 CR4: 0000000000350ef0 Call Trace: ? watchdog_timer_fn+0x2cd/0x390 ? __pfx_watchdog_timer_fn+0x10/0x10 ? __hrtimer_run_queues+0x300/0x6d0 ? sched_clock_cpu+0x69/0x4e0 ? __pfx___hrtimer_run_queues+0x10/0x10 ? srso_return_thunk+0x5/0x5f ? ktime_get_update_offsets_now+0x7f/0x2a0 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? hrtimer_interrupt+0x2ca/0x760 ? __sysvec_apic_timer_interrupt+0x8c/0x2b0 ? sysvec_apic_timer_interrupt+0x6a/0x90 ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? smp_call_function_many_cond+0x1d8/0xbb0 ? __pfx_do_kernel_range_flush+0x10/0x10 on_each_cpu_cond_mask+0x20/0x40 flush_tlb_kernel_range+0x19b/0x250 ? srso_return_thunk+0x5/0x5f ? kasan_release_vmalloc+0xa7/0xc0 purge_vmap_node+0x357/0x820 ? __pfx_purge_vmap_node+0x10/0x10 __purge_vmap_area_lazy+0x5b8/0xa10 drain_vmap_area_work+0x21/0x30 process_one_work+0x661/0x10b0 worker_thread+0x844/0x10e0 ? srso_return_thunk+0x5/0x5f ? __kthread_parkme+0x82/0x140 ? __pfx_worker_thread+0x10/0x10 kthread+0x2a5/0x370 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x30/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Debugging Analysis: 1. The following ftrace log shows that the lockup CPU spends too much time iterating vmap_nodes and flushing TLB when purging vm_area structures. (Some info is trimmed). kworker: funcgraph_entry: | drain_vmap_area_work() { kworker: funcgraph_entry: | mutex_lock() { kworker: funcgraph_entry: 1.092 us | __cond_resched(); kworker: funcgraph_exit: 3.306 us | } ... ... kworker: funcgraph_entry: | flush_tlb_kernel_range() { ... ... kworker: funcgraph_exit: # 7533.649 us | } ... ... kworker: funcgraph_entry: 2.344 us | mutex_unlock(); kworker: funcgraph_exit: $ 23871554 us | } The drain_vmap_area_work() spends over 23 seconds. There are 2805 flush_tlb_kernel_range() calls in the ftrace log. * One is called in __purge_vmap_area_lazy(). * Others are called by purge_vmap_node->kasan_release_vmalloc. purge_vmap_node() iteratively releases kasan vmalloc allocations and flushes TLB for each vmap_area. - [Rough calculation] Each flush_tlb_kernel_range() runs about 7.5ms. -- 2804 * 7.5ms = 21.03 seconds. -- That's why a soft lock is triggered. 2. Extending the soft lockup time can work around the issue (For example, # echo 60 > /proc/sys/kernel/watchdog_thresh). This confirms the above-mentioned speculation: drain_vmap_area_work() spends too much time. If we combine all TLB flush operations of the KASAN shadow virtual address into one operation in the call path 'purge_vmap_node()->kasan_release_vmalloc()', the running time of drain_vmap_area_work() can be saved greatly. The idea is from the flush_tlb_kernel_range() call in __purge_vmap_area_lazy(). And, the soft lockup won't be triggered. Here is the test result based on 6.10: [6.10 wo/ the patch] 1. ftrace latency profiling (record a trace if the latency > 20s). echo 20000000 > /sys/kernel/debug/tracing/tracing_thresh echo drain_vmap_area_work > /sys/kernel/debug/tracing/set_graph_function echo function_graph > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_on 2. Run `make -j $(nproc)` to compile the kernel source 3. Once the soft lockup is reproduced, check the ftrace log: cat /sys/kernel/debug/tracing/trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 76) $ 50412985 us | } /* __purge_vmap_area_lazy */ 76) $ 50412997 us | } /* drain_vmap_area_work */ 76) $ 29165911 us | } /* __purge_vmap_area_lazy */ 76) $ 29165926 us | } /* drain_vmap_area_work */ 91) $ 53629423 us | } /* __purge_vmap_area_lazy */ 91) $ 53629434 us | } /* drain_vmap_area_work */ 91) $ 28121014 us | } /* __purge_vmap_area_lazy */ 91) $ 28121026 us | } /* drain_vmap_area_work */ [6.10 w/ the patch] 1. Repeat step 1-2 in "[6.10 wo/ the patch]" 2. The soft lockup is not triggered and ftrace log is empty. cat /sys/kernel/debug/tracing/trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 3. Setting 'tracing_thresh' to 10/5 seconds does not get any ftrace log. 4. Setting 'tracing_thresh' to 1 second gets ftrace log. cat /sys/kernel/debug/tracing/trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 23) $ 1074942 us | } /* __purge_vmap_area_lazy */ 23) $ 1074950 us | } /* drain_vmap_area_work */ The worst execution time of drain_vmap_area_work() is about 1 second. Link: https://lore.kernel.org/lkml/ZqFlawuVnOMY2k3E@pc638.lan/ Link: https://lkml.kernel.org/r/20240726165246.31326-1-ahuang12@lenovo.com Fixes: 282631cb2447 ("mm: vmalloc: remove global purge_vmap_area_root rb-tree") Signed-off-by: Adrian Huang Co-developed-by: Uladzislau Rezki (Sony) Signed-off-by: Uladzislau Rezki (Sony) Tested-by: Jiwei Sun Reviewed-by: Baoquan He Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Christoph Hellwig Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- include/linux/kasan.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 00a3bf7c0d8f..6bbfc8aa42e8 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -29,6 +29,9 @@ typedef unsigned int __bitwise kasan_vmalloc_flags_t; #define KASAN_VMALLOC_VM_ALLOC ((__force kasan_vmalloc_flags_t)0x02u) #define KASAN_VMALLOC_PROT_NORMAL ((__force kasan_vmalloc_flags_t)0x04u) +#define KASAN_VMALLOC_PAGE_RANGE 0x1 /* Apply exsiting page range */ +#define KASAN_VMALLOC_TLB_FLUSH 0x2 /* TLB flush */ + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #include @@ -564,7 +567,8 @@ void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end); + unsigned long free_region_end, + unsigned long flags); #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ @@ -579,7 +583,8 @@ static inline int kasan_populate_vmalloc(unsigned long start, static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) { } + unsigned long free_region_end, + unsigned long flags) { } #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ @@ -614,7 +619,8 @@ static inline int kasan_populate_vmalloc(unsigned long start, static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) { } + unsigned long free_region_end, + unsigned long flags) { } static inline void *kasan_unpoison_vmalloc(const void *start, unsigned long size, -- cgit v1.2.3 From 1cd1a4e71b61eaf8cadd15372b67ccd60a2e1a99 Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 27 Sep 2024 00:05:16 +0530 Subject: mm/mempolicy: fix comments for better documentation Fix typo in mempolicy.h and Correct the number of allowed memory policy Link: https://lkml.kernel.org/r/20240926183516.4034-2-tanyaagarwal25699@gmail.com Signed-off-by: Tanya Agarwal Reviewed-by: Shuah Khan Cc: Anup Sharma Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 1add16f21612..ce9885e0178a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -47,7 +47,7 @@ struct mempolicy { atomic_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ - nodemask_t nodes; /* interleave/bind/perfer */ + nodemask_t nodes; /* interleave/bind/preferred/etc */ int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { -- cgit v1.2.3 From f2f484085ef1a2bb5aea861a06bc6b4dc50d2ab8 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Thu, 26 Sep 2024 15:49:22 +0800 Subject: mm: move mm flags to mm_types.h The types of mm flags are now far beyond the core dump related features. This patch moves mm flags from linux/sched/coredump.h to linux/mm_types.h. The linux/sched/coredump.h has include the mm_types.h, so the C files related to coredump does not need to change head file inclusion. In addition, the inclusion of sched/coredump.h now can be deleted from the C files that irrelevant to core dump. Link: https://lkml.kernel.org/r/20240926074922.2721274-1-sunnanyong@huawei.com Signed-off-by: Nanyong Sun Cc: Kefeng Wang Cc: Masami Hiramatsu Cc: Matthew Wilcox Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 1 - include/linux/khugepaged.h | 2 - include/linux/ksm.h | 1 - include/linux/mm_types.h | 84 ++++++++++++++++++++++++++++++++++++++++++ include/linux/oom.h | 1 - include/linux/sched/coredump.h | 82 ----------------------------------------- 6 files changed, 84 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ef5b80e48599..8afe09a2cf03 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -2,7 +2,6 @@ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H -#include #include #include /* only for vma_is_dax() */ diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 30baae91b225..1f46046080f5 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -2,8 +2,6 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H -#include /* MMF_VM_HUGEPAGE */ - extern unsigned int khugepaged_max_ptes_none __read_mostly; #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index ec9c05044d4f..29022e71a074 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -13,7 +13,6 @@ #include #include #include -#include #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e3bdf8e38bc..ff8627acbaa7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1499,4 +1499,88 @@ enum { /* See also internal only FOLL flags in mm/internal.h */ }; +/* mm flags */ + +/* + * The first two bits represent core dump modes for set-user-ID, + * the modes are SUID_DUMP_* defined in linux/sched/coredump.h + */ +#define MMF_DUMPABLE_BITS 2 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_ELF_HEADERS 6 +#define MMF_DUMP_HUGETLB_PRIVATE 7 +#define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 + +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 9 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ + (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS +# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +#else +# define MMF_DUMP_MASK_DEFAULT_ELF 0 +#endif + /* leave room for more dump flags */ +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ +#define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */ + +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ + +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ +#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ +#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 26 /* mm is shared between processes */ +/* + * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either + * replaced in the future by mm.pinned_vm when it becomes stable, or grow into + * a counter on its own. We're aggresive on this bit for now: even if the + * pinned pages were unpinned later on, we'll still keep this bit set for the + * lifecycle of this mm, just for simplicity. + */ +#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ + +#define MMF_HAS_MDWE 28 +#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) + + +#define MMF_HAS_MDWE_NO_INHERIT 29 + +#define MMF_VM_MERGE_ANY 30 +#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) + +#define MMF_TOPDOWN 31 /* mm searches top down by default */ +#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) + +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ + MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} + #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/oom.h b/include/linux/oom.h index 7d0c9c48a0c5..1e0fc6931ce9 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -7,7 +7,6 @@ #include #include #include -#include /* MMF_* */ #include /* VM_FAULT* */ struct zonelist; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index e62ff805cfc9..6eb65ceed213 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -8,12 +8,6 @@ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ -/* mm flags */ - -/* for SUID_DUMP_* above */ -#define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) - extern void set_dumpable(struct mm_struct *mm, int value); /* * This returns the actual value of the suid_dumpable flag. For things @@ -31,80 +25,4 @@ static inline int get_dumpable(struct mm_struct *mm) return __get_dumpable(mm->flags); } -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_HUGETLB_PRIVATE 7 -#define MMF_DUMP_HUGETLB_SHARED 8 -#define MMF_DUMP_DAX_PRIVATE 9 -#define MMF_DUMP_DAX_SHARED 10 - -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 9 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) - -#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) -#else -# define MMF_DUMP_MASK_DEFAULT_ELF 0 -#endif - /* leave room for more dump flags */ -#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ -#define MMF_VM_HUGEPAGE 17 /* set when mm is available for - khugepaged */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ - -#define MMF_HAS_UPROBES 19 /* has uprobes */ -#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ -#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ -#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) -#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ -#define MMF_MULTIPROCESS 26 /* mm is shared between processes */ -/* - * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either - * replaced in the future by mm.pinned_vm when it becomes stable, or grow into - * a counter on its own. We're aggresive on this bit for now: even if the - * pinned pages were unpinned later on, we'll still keep this bit set for the - * lifecycle of this mm, just for simplicity. - */ -#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ - -#define MMF_HAS_MDWE 28 -#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) - - -#define MMF_HAS_MDWE_NO_INHERIT 29 - -#define MMF_VM_MERGE_ANY 30 -#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) - -#define MMF_TOPDOWN 31 /* mm searches top down by default */ -#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) - -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ - MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) - -static inline unsigned long mmf_init_flags(unsigned long flags) -{ - if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) - flags &= ~((1UL << MMF_HAS_MDWE) | - (1UL << MMF_HAS_MDWE_NO_INHERIT)); - return flags & MMF_INIT_MASK; -} - #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 66efef9b1a7d6cc725efa9395fb390483ad5b555 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 26 Sep 2024 14:46:14 +0800 Subject: mm: pgtable: introduce pte_offset_map_{ro|rw}_nolock() Patch series "introduce pte_offset_map_{ro|rw}_nolock()", v5. As proposed by David Hildenbrand [1], this series introduces the following two new helper functions to replace pte_offset_map_nolock(). 1. pte_offset_map_ro_nolock() 2. pte_offset_map_rw_nolock() As the name suggests, pte_offset_map_ro_nolock() is used for read-only case. In this case, only read-only operations will be performed on PTE page after the PTL is held. The RCU lock in pte_offset_map_nolock() will ensure that the PTE page will not be freed, and there is no need to worry about whether the pmd entry is modified. Therefore pte_offset_map_ro_nolock() is just a renamed version of pte_offset_map_nolock(). pte_offset_map_rw_nolock() is used for may-write case. In this case, the pte or pmd entry may be modified after the PTL is held, so we need to ensure that the pmd entry has not been modified concurrently. So in addition to the name change, it also outputs the pmdval when successful. The users should make sure the page table is stable like checking pte_same() or checking pmd_same() by using the output pmdval before performing the write operations. This series will convert all pte_offset_map_nolock() into the above two helper functions one by one, and finally completely delete it. This also a preparation for reclaiming the empty user PTE page table pages. This patch (of 13): Currently, the usage of pte_offset_map_nolock() can be divided into the following two cases: 1) After acquiring PTL, only read-only operations are performed on the PTE page. In this case, the RCU lock in pte_offset_map_nolock() will ensure that the PTE page will not be freed, and there is no need to worry about whether the pmd entry is modified. 2) After acquiring PTL, the pte or pmd entries may be modified. At this time, we need to ensure that the pmd entry has not been modified concurrently. To more clearing distinguish between these two cases, this commit introduces two new helper functions to replace pte_offset_map_nolock(). For 1), just rename it to pte_offset_map_ro_nolock(). For 2), in addition to changing the name to pte_offset_map_rw_nolock(), it also outputs the pmdval when successful. It is applicable for may-write cases where any modification operations to the page table may happen after the corresponding spinlock is held afterwards. But the users should make sure the page table is stable like checking pte_same() or checking pmd_same() by using the output pmdval before performing the write operations. Note: "RO" / "RW" expresses the intended semantics, not that the *kmap* will be read-only/read-write protected. Subsequent commits will convert pte_offset_map_nolock() into the above two functions one by one, and finally completely delete it. Link: https://lkml.kernel.org/r/cover.1727332572.git.zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/5aeecfa131600a454b1f3a038a1a54282ca3b856.1727332572.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mike Rapoport (Microsoft) Cc: Peter Xu Cc: Ryan Roberts Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 61fff5d34ed5..0cf45d4b7286 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3017,6 +3017,11 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp); +pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, spinlock_t **ptlp); +pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, pmd_t *pmdvalp, + spinlock_t **ptlp); #define pte_unmap_unlock(pte, ptl) do { \ spin_unlock(ptl); \ -- cgit v1.2.3 From 583e66debd1d5aa8c401aebe924c7406e15579a7 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 26 Sep 2024 14:46:26 +0800 Subject: mm: pgtable: remove pte_offset_map_nolock() Now no users are using the pte_offset_map_nolock(), remove it. Link: https://lkml.kernel.org/r/d04f9bbbcde048fb6ffa6f2bdbc6f9b22d5286f9.1727332572.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mike Rapoport (Microsoft) Cc: Peter Xu Cc: Ryan Roberts Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0cf45d4b7286..8f5394d75ce2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3015,8 +3015,6 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, return pte; } -pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp); pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp); pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, -- cgit v1.2.3 From 473c371254d2c9906c286c939eaa99d0fac13e38 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Thu, 26 Sep 2024 13:06:47 +0800 Subject: mm: migrate LRU_REFS_MASK bits in folio_migrate_flags Bits of LRU_REFS_MASK are not inherited during migration which lead to new folio start from tier0 when MGLRU enabled. Try to bring as much bits of folio->flags as possible since compaction and alloc_contig_range which introduce migration do happen at times. Link: https://lkml.kernel.org/r/20240926050647.5653-1-zhaoyang.huang@unisoc.com Signed-off-by: Zhaoyang Huang Suggested-by: Yu Zhao Acked-by: David Hildenbrand Cc: Matthew Wilcox Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f4fe593c1400..6f801c7b36e2 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -291,6 +291,12 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return true; } +static inline void folio_migrate_refs(struct folio *new, struct folio *old) +{ + unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK; + + set_mask_bits(&new->flags, LRU_REFS_MASK, refs); +} #else /* !CONFIG_LRU_GEN */ static inline bool lru_gen_enabled(void) @@ -313,6 +319,10 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return false; } +static inline void folio_migrate_refs(struct folio *new, struct folio *old) +{ + +} #endif /* CONFIG_LRU_GEN */ static __always_inline -- cgit v1.2.3 From 43b53bca61b032270395152ba93d6e9ead47f0de Mon Sep 17 00:00:00 2001 From: Imran Shaik Date: Thu, 22 Aug 2024 16:57:18 +0530 Subject: dt-bindings: clock: qcom: Add GCC clocks for QCS8300 Add support for qcom global clock controller bindings for QCS8300 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Imran Shaik Link: https://lore.kernel.org/r/20240822-qcs8300-gcc-v2-1-b310dfa70ad8@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qcs8300-gcc.h | 234 +++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcs8300-gcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qcs8300-gcc.h b/include/dt-bindings/clock/qcom,qcs8300-gcc.h new file mode 100644 index 000000000000..a0083b1d2126 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs8300-gcc.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_QCS8300_H +#define _DT_BINDINGS_CLK_QCOM_GCC_QCS8300_H + +/* GCC clocks */ +#define GCC_GPLL0 0 +#define GCC_GPLL0_OUT_EVEN 1 +#define GCC_GPLL1 2 +#define GCC_GPLL4 3 +#define GCC_GPLL7 4 +#define GCC_GPLL9 5 +#define GCC_AGGRE_NOC_QUPV3_AXI_CLK 6 +#define GCC_AGGRE_UFS_PHY_AXI_CLK 7 +#define GCC_AGGRE_USB2_PRIM_AXI_CLK 8 +#define GCC_AGGRE_USB3_PRIM_AXI_CLK 9 +#define GCC_AHB2PHY0_CLK 10 +#define GCC_AHB2PHY2_CLK 11 +#define GCC_AHB2PHY3_CLK 12 +#define GCC_BOOT_ROM_AHB_CLK 13 +#define GCC_CAMERA_AHB_CLK 14 +#define GCC_CAMERA_HF_AXI_CLK 15 +#define GCC_CAMERA_SF_AXI_CLK 16 +#define GCC_CAMERA_THROTTLE_XO_CLK 17 +#define GCC_CAMERA_XO_CLK 18 +#define GCC_CFG_NOC_USB2_PRIM_AXI_CLK 19 +#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK 20 +#define GCC_DDRSS_GPU_AXI_CLK 21 +#define GCC_DISP_AHB_CLK 22 +#define GCC_DISP_HF_AXI_CLK 23 +#define GCC_DISP_XO_CLK 24 +#define GCC_EDP_REF_CLKREF_EN 25 +#define GCC_EMAC0_AXI_CLK 26 +#define GCC_EMAC0_PHY_AUX_CLK 27 +#define GCC_EMAC0_PHY_AUX_CLK_SRC 28 +#define GCC_EMAC0_PTP_CLK 29 +#define GCC_EMAC0_PTP_CLK_SRC 30 +#define GCC_EMAC0_RGMII_CLK 31 +#define GCC_EMAC0_RGMII_CLK_SRC 32 +#define GCC_EMAC0_SLV_AHB_CLK 33 +#define GCC_GP1_CLK 34 +#define GCC_GP1_CLK_SRC 35 +#define GCC_GP2_CLK 36 +#define GCC_GP2_CLK_SRC 37 +#define GCC_GP3_CLK 38 +#define GCC_GP3_CLK_SRC 39 +#define GCC_GP4_CLK 40 +#define GCC_GP4_CLK_SRC 41 +#define GCC_GP5_CLK 42 +#define GCC_GP5_CLK_SRC 43 +#define GCC_GPU_CFG_AHB_CLK 44 +#define GCC_GPU_GPLL0_CLK_SRC 45 +#define GCC_GPU_GPLL0_DIV_CLK_SRC 46 +#define GCC_GPU_MEMNOC_GFX_CENTER_PIPELINE_CLK 47 +#define GCC_GPU_MEMNOC_GFX_CLK 48 +#define GCC_GPU_SNOC_DVM_GFX_CLK 49 +#define GCC_GPU_TCU_THROTTLE_AHB_CLK 50 +#define GCC_GPU_TCU_THROTTLE_CLK 51 +#define GCC_PCIE_0_AUX_CLK 52 +#define GCC_PCIE_0_AUX_CLK_SRC 53 +#define GCC_PCIE_0_CFG_AHB_CLK 54 +#define GCC_PCIE_0_MSTR_AXI_CLK 55 +#define GCC_PCIE_0_PHY_AUX_CLK 56 +#define GCC_PCIE_0_PHY_AUX_CLK_SRC 57 +#define GCC_PCIE_0_PHY_RCHNG_CLK 58 +#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC 59 +#define GCC_PCIE_0_PIPE_CLK 60 +#define GCC_PCIE_0_PIPE_CLK_SRC 61 +#define GCC_PCIE_0_PIPE_DIV_CLK_SRC 62 +#define GCC_PCIE_0_PIPEDIV2_CLK 63 +#define GCC_PCIE_0_SLV_AXI_CLK 64 +#define GCC_PCIE_0_SLV_Q2A_AXI_CLK 65 +#define GCC_PCIE_1_AUX_CLK 66 +#define GCC_PCIE_1_AUX_CLK_SRC 67 +#define GCC_PCIE_1_CFG_AHB_CLK 68 +#define GCC_PCIE_1_MSTR_AXI_CLK 69 +#define GCC_PCIE_1_PHY_AUX_CLK 70 +#define GCC_PCIE_1_PHY_AUX_CLK_SRC 71 +#define GCC_PCIE_1_PHY_RCHNG_CLK 72 +#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC 73 +#define GCC_PCIE_1_PIPE_CLK 74 +#define GCC_PCIE_1_PIPE_CLK_SRC 75 +#define GCC_PCIE_1_PIPE_DIV_CLK_SRC 76 +#define GCC_PCIE_1_PIPEDIV2_CLK 77 +#define GCC_PCIE_1_SLV_AXI_CLK 78 +#define GCC_PCIE_1_SLV_Q2A_AXI_CLK 79 +#define GCC_PCIE_CLKREF_EN 80 +#define GCC_PCIE_THROTTLE_CFG_CLK 81 +#define GCC_PDM2_CLK 82 +#define GCC_PDM2_CLK_SRC 83 +#define GCC_PDM_AHB_CLK 84 +#define GCC_PDM_XO4_CLK 85 +#define GCC_QMIP_CAMERA_NRT_AHB_CLK 86 +#define GCC_QMIP_CAMERA_RT_AHB_CLK 87 +#define GCC_QMIP_DISP_AHB_CLK 88 +#define GCC_QMIP_DISP_ROT_AHB_CLK 89 +#define GCC_QMIP_VIDEO_CVP_AHB_CLK 90 +#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK 91 +#define GCC_QMIP_VIDEO_VCPU_AHB_CLK 92 +#define GCC_QUPV3_WRAP0_CORE_2X_CLK 93 +#define GCC_QUPV3_WRAP0_CORE_CLK 94 +#define GCC_QUPV3_WRAP0_S0_CLK 95 +#define GCC_QUPV3_WRAP0_S0_CLK_SRC 96 +#define GCC_QUPV3_WRAP0_S1_CLK 97 +#define GCC_QUPV3_WRAP0_S1_CLK_SRC 98 +#define GCC_QUPV3_WRAP0_S2_CLK 99 +#define GCC_QUPV3_WRAP0_S2_CLK_SRC 100 +#define GCC_QUPV3_WRAP0_S3_CLK 101 +#define GCC_QUPV3_WRAP0_S3_CLK_SRC 102 +#define GCC_QUPV3_WRAP0_S4_CLK 103 +#define GCC_QUPV3_WRAP0_S4_CLK_SRC 104 +#define GCC_QUPV3_WRAP0_S5_CLK 105 +#define GCC_QUPV3_WRAP0_S5_CLK_SRC 106 +#define GCC_QUPV3_WRAP0_S6_CLK 107 +#define GCC_QUPV3_WRAP0_S6_CLK_SRC 108 +#define GCC_QUPV3_WRAP0_S7_CLK 109 +#define GCC_QUPV3_WRAP0_S7_CLK_SRC 110 +#define GCC_QUPV3_WRAP1_CORE_2X_CLK 111 +#define GCC_QUPV3_WRAP1_CORE_CLK 112 +#define GCC_QUPV3_WRAP1_S0_CLK 113 +#define GCC_QUPV3_WRAP1_S0_CLK_SRC 114 +#define GCC_QUPV3_WRAP1_S1_CLK 115 +#define GCC_QUPV3_WRAP1_S1_CLK_SRC 116 +#define GCC_QUPV3_WRAP1_S2_CLK 117 +#define GCC_QUPV3_WRAP1_S2_CLK_SRC 118 +#define GCC_QUPV3_WRAP1_S3_CLK 119 +#define GCC_QUPV3_WRAP1_S3_CLK_SRC 120 +#define GCC_QUPV3_WRAP1_S4_CLK 121 +#define GCC_QUPV3_WRAP1_S4_CLK_SRC 122 +#define GCC_QUPV3_WRAP1_S5_CLK 123 +#define GCC_QUPV3_WRAP1_S5_CLK_SRC 124 +#define GCC_QUPV3_WRAP1_S6_CLK 125 +#define GCC_QUPV3_WRAP1_S6_CLK_SRC 126 +#define GCC_QUPV3_WRAP1_S7_CLK 127 +#define GCC_QUPV3_WRAP1_S7_CLK_SRC 128 +#define GCC_QUPV3_WRAP3_CORE_2X_CLK 129 +#define GCC_QUPV3_WRAP3_CORE_CLK 130 +#define GCC_QUPV3_WRAP3_QSPI_CLK 131 +#define GCC_QUPV3_WRAP3_S0_CLK 132 +#define GCC_QUPV3_WRAP3_S0_CLK_SRC 133 +#define GCC_QUPV3_WRAP3_S0_DIV_CLK_SRC 134 +#define GCC_QUPV3_WRAP_0_M_AHB_CLK 135 +#define GCC_QUPV3_WRAP_0_S_AHB_CLK 136 +#define GCC_QUPV3_WRAP_1_M_AHB_CLK 137 +#define GCC_QUPV3_WRAP_1_S_AHB_CLK 138 +#define GCC_QUPV3_WRAP_3_M_AHB_CLK 139 +#define GCC_QUPV3_WRAP_3_S_AHB_CLK 140 +#define GCC_SDCC1_AHB_CLK 141 +#define GCC_SDCC1_APPS_CLK 142 +#define GCC_SDCC1_APPS_CLK_SRC 143 +#define GCC_SDCC1_ICE_CORE_CLK 144 +#define GCC_SDCC1_ICE_CORE_CLK_SRC 145 +#define GCC_SGMI_CLKREF_EN 146 +#define GCC_UFS_PHY_AHB_CLK 147 +#define GCC_UFS_PHY_AXI_CLK 148 +#define GCC_UFS_PHY_AXI_CLK_SRC 149 +#define GCC_UFS_PHY_ICE_CORE_CLK 150 +#define GCC_UFS_PHY_ICE_CORE_CLK_SRC 151 +#define GCC_UFS_PHY_PHY_AUX_CLK 152 +#define GCC_UFS_PHY_PHY_AUX_CLK_SRC 153 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK 154 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK_SRC 155 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK 156 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK_SRC 157 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK 158 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK_SRC 159 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK 160 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC 161 +#define GCC_USB20_MASTER_CLK 162 +#define GCC_USB20_MASTER_CLK_SRC 163 +#define GCC_USB20_MOCK_UTMI_CLK 164 +#define GCC_USB20_MOCK_UTMI_CLK_SRC 165 +#define GCC_USB20_MOCK_UTMI_POSTDIV_CLK_SRC 166 +#define GCC_USB20_SLEEP_CLK 167 +#define GCC_USB30_PRIM_MASTER_CLK 168 +#define GCC_USB30_PRIM_MASTER_CLK_SRC 169 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK 170 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC 171 +#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC 172 +#define GCC_USB30_PRIM_SLEEP_CLK 173 +#define GCC_USB3_PRIM_PHY_AUX_CLK 174 +#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC 175 +#define GCC_USB3_PRIM_PHY_COM_AUX_CLK 176 +#define GCC_USB3_PRIM_PHY_PIPE_CLK 177 +#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC 178 +#define GCC_USB_CLKREF_EN 179 +#define GCC_VIDEO_AHB_CLK 180 +#define GCC_VIDEO_AXI0_CLK 181 +#define GCC_VIDEO_AXI1_CLK 182 +#define GCC_VIDEO_XO_CLK 183 + +/* GCC power domains */ +#define GCC_EMAC0_GDSC 0 +#define GCC_PCIE_0_GDSC 1 +#define GCC_PCIE_1_GDSC 2 +#define GCC_UFS_PHY_GDSC 3 +#define GCC_USB20_PRIM_GDSC 4 +#define GCC_USB30_PRIM_GDSC 5 + +/* GCC resets */ +#define GCC_EMAC0_BCR 0 +#define GCC_PCIE_0_BCR 1 +#define GCC_PCIE_0_LINK_DOWN_BCR 2 +#define GCC_PCIE_0_NOCSR_COM_PHY_BCR 3 +#define GCC_PCIE_0_PHY_BCR 4 +#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR 5 +#define GCC_PCIE_1_BCR 6 +#define GCC_PCIE_1_LINK_DOWN_BCR 7 +#define GCC_PCIE_1_NOCSR_COM_PHY_BCR 8 +#define GCC_PCIE_1_PHY_BCR 9 +#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR 10 +#define GCC_SDCC1_BCR 11 +#define GCC_UFS_PHY_BCR 12 +#define GCC_USB20_PRIM_BCR 13 +#define GCC_USB2_PHY_PRIM_BCR 14 +#define GCC_USB2_PHY_SEC_BCR 15 +#define GCC_USB30_PRIM_BCR 16 +#define GCC_USB3_DP_PHY_PRIM_BCR 17 +#define GCC_USB3_PHY_PRIM_BCR 18 +#define GCC_USB3_PHY_TERT_BCR 19 +#define GCC_USB3_UNIPHY_MP0_BCR 20 +#define GCC_USB3_UNIPHY_MP1_BCR 21 +#define GCC_USB3PHY_PHY_PRIM_BCR 22 +#define GCC_USB3UNIPHY_PHY_MP0_BCR 23 +#define GCC_USB3UNIPHY_PHY_MP1_BCR 24 +#define GCC_USB_PHY_CFG_AHB2PHY_BCR 25 +#define GCC_VIDEO_BCR 26 +#define GCC_VIDEO_AXI0_CLK_ARES 27 +#define GCC_VIDEO_AXI1_CLK_ARES 28 + +#endif -- cgit v1.2.3 From 4cc0473d7754d387680bdf0728eb29f0ec8834bf Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Mon, 7 Oct 2024 22:49:05 +0800 Subject: get rid of __get_task_comm() Patch series "Improve the copy of task comm", v8. Using {memcpy,strncpy,strcpy,kstrdup} to copy the task comm relies on the length of task comm. Changes in the task comm could result in a destination string that is overflow. Therefore, we should explicitly ensure the destination string is always NUL-terminated, regardless of the task comm. This approach will facilitate future extensions to the task comm. As suggested by Linus [0], we can identify all relevant code with the following git grep command: git grep 'memcpy.*->comm\>' git grep 'kstrdup.*->comm\>' git grep 'strncpy.*->comm\>' git grep 'strcpy.*->comm\>' PATCH #2~#4: memcpy PATCH #5~#6: kstrdup PATCH #7: strcpy Please note that strncpy() is not included in this series as it is being tracked by another effort. [1] This patch (of 7): We want to eliminate the use of __get_task_comm() for the following reasons: - The task_lock() is unnecessary Quoted from Linus [0]: : Since user space can randomly change their names anyway, using locking : was always wrong for readers (for writers it probably does make sense : to have some lock - although practically speaking nobody cares there : either, but at least for a writer some kind of race could have : long-term mixed results Link: https://lkml.kernel.org/r/20241007144911.27693-1-laoar.shao@gmail.com Link: https://lkml.kernel.org/r/20241007144911.27693-2-laoar.shao@gmail.com Link: https://lore.kernel.org/all/CAHk-=wivfrF0_zvf+oj6==Sh=-npJooP8chLPEfaFV0oNYTTBA@mail.gmail.com [0] Link: https://lore.kernel.org/all/CAHk-=whWtUC-AjmGJveAETKOMeMFSTwKwu99v7+b6AyHMmaDFA@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjAmmHUg6vho1KjzQi2=psR30+CogFd4aXrThr2gsiS4g@mail.gmail.com/ [0] Link: https://github.com/KSPP/linux/issues/90 [1] Signed-off-by: Yafang Shao Suggested-by: Linus Torvalds Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Eric Biederman Cc: Kees Cook Cc: Alexei Starovoitov Cc: Matus Jokay Cc: Alejandro Colomar Cc: "Serge E. Hallyn" Cc: Catalin Marinas Cc: Justin Stitt Cc: Steven Rostedt (Google) Cc: Tetsuo Handa Cc: Andy Shevchenko Cc: Daniel Vetter Cc: David Airlie Cc: Eric Paris Cc: James Morris Cc: Maarten Lankhorst Cc: Matthew Wilcox Cc: Maxime Ripard Cc: Ondrej Mosnacek Cc: Paul Moore Cc: Quentin Monnet Cc: Simon Horman Cc: Stephen Smalley Cc: Thomas Zimmermann Signed-off-by: Andrew Morton --- include/linux/sched.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index bb343136ddd0..67718d5591dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1121,9 +1121,12 @@ struct task_struct { /* * executable name, excluding path. * - * - normally initialized setup_new_exec() - * - access it with [gs]et_task_comm() - * - lock it with task_lock() + * - normally initialized begin_new_exec() + * - set it with set_task_comm() + * - strscpy_pad() to ensure it is always NUL-terminated and + * zero-padded + * - task_lock() to ensure the operation is atomic and the name is + * fully updated. */ char comm[TASK_COMM_LEN]; @@ -1938,10 +1941,23 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from) __set_task_comm(tsk, from, false); } -extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); +/* + * - Why not use task_lock()? + * User space can randomly change their names anyway, so locking for readers + * doesn't make sense. For writers, locking is probably necessary, as a race + * condition could lead to long-term mixed results. + * The strscpy_pad() in __set_task_comm() can ensure that the task comm is + * always NUL-terminated and zero-padded. Therefore the race condition between + * reader and writer is not an issue. + * + * - BUILD_BUG_ON() can help prevent the buf from being truncated. + * Since the callers don't perform any return value checks, this safeguard is + * necessary. + */ #define get_task_comm(buf, tsk) ({ \ - BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ - __get_task_comm(buf, sizeof(buf), tsk); \ + BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \ + strscpy_pad(buf, (tsk)->comm); \ + buf; \ }) #ifdef CONFIG_SMP -- cgit v1.2.3 From f2fa0fd4e7db8326a77618962714924b64f5f889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 12 Oct 2024 19:52:53 +0200 Subject: reboot: move reboot_notifier_list to kernel/reboot.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the functions related to the reboot notifier list are in kernel/reboot.c. Move the list itself, too. As there are no direct users anymore, make the declaration static. Link: https://lkml.kernel.org/r/20241012-reboot_notifier_list-v1-1-6093bb9455ce@weissschuh.net Signed-off-by: Thomas Weißschuh Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- include/linux/notifier.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 45702bdcbceb..b42e64734968 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -237,7 +237,5 @@ static inline int notifier_to_errno(int ret) #define KBD_KEYSYM 0x0004 /* Keyboard keysym */ #define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ -extern struct blocking_notifier_head reboot_notifier_list; - #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ -- cgit v1.2.3 From a9d38bcd7337f051912174ebfc500e1cef73982e Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Sat, 12 Oct 2024 18:08:17 +0800 Subject: scatterlist: fix a typo Replace the 'One' with 'On'. Link: https://lkml.kernel.org/r/20241012100817.323007-1-sui.jingfeng@linux.dev Fixes: af2880ec4402 ("scatterlist: add dedicated config for DMA flags") Signed-off-by: Sui Jingfeng Reviewed-by: Petr Tesarik Cc: Catalin Marinas Cc: Michael Kelley Cc: Robin Murphy Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index e61d164622db..c5e2239b550e 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -273,7 +273,7 @@ static inline void sg_unmark_end(struct scatterlist *sg) } /* - * One 64-bit architectures there is a 4-byte padding in struct scatterlist + * On 64-bit architectures there is a 4-byte padding in struct scatterlist * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA * flags bits to indicate when a specific dma address is a bus address or the * buffer may have been bounced via SWIOTLB. -- cgit v1.2.3 From 74ef070e325465a1b364db6a5c6859785537f835 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Oct 2024 10:07:36 +0200 Subject: percpu: merge VERIFY_PERCPU_PTR() into its only user Merge VERIFY_PERCPU_PTR() into non-CONFIG_SMP per_cpu_ptr() to make macro similar to CONFIG_SMP per_cpu_ptr(). This will allow a follow-up patch to refactor common code to a macro. No functional changes, non-CONFIG_SMP per_cpu_ptr() was the only user of VERIFY_PERCPU_PTR(). Link: https://lkml.kernel.org/r/20241021080856.48746-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8efce7414fad..7fa88c5f4b26 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -254,13 +254,13 @@ do { \ #else /* CONFIG_SMP */ -#define VERIFY_PERCPU_PTR(__p) \ +#define per_cpu_ptr(ptr, cpu) \ ({ \ - __verify_pcpu_ptr(__p); \ - (typeof(*(__p)) __kernel __force *)(__p); \ + (void)(cpu); \ + __verify_pcpu_ptr(ptr); \ + (typeof(*(ptr)) __kernel __force *)(ptr); \ }) -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); }) #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) -- cgit v1.2.3 From 001217defda86d0d6a5a9e6cf77a6b813857e7e3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Oct 2024 10:07:37 +0200 Subject: percpu: introduce PERCPU_PTR() macro Introduce PERCPU_PTR() macro to cast the percpu pointer from the percpu address space to a generic (kernel) address space. Use it in per_cpu_ptr() and related SHIFT_PERCPU_PTR() macros. Also remove common knowledge from SHIFT_PERCPU_PTR() comment, "weird cast" is just a standard way to inform sparse of a cast from the percpu address space to a generic address space. Link: https://lkml.kernel.org/r/20241021080856.48746-2-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 7fa88c5f4b26..e1cf7982424f 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -220,15 +220,17 @@ do { \ (void)__vpp_verify; \ } while (0) +#define PERCPU_PTR(__p) \ + (typeof(*(__p)) __force __kernel *)(__p); + #ifdef CONFIG_SMP /* - * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() - * to prevent the compiler from making incorrect assumptions about the - * pointer value. The weird cast keeps both GCC and sparse happy. + * Add an offset to a pointer. Use RELOC_HIDE() to prevent the compiler + * from making incorrect assumptions about the pointer value. */ #define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) + RELOC_HIDE(PERCPU_PTR(__p), (__offset)) #define per_cpu_ptr(ptr, cpu) \ ({ \ @@ -258,7 +260,7 @@ do { \ ({ \ (void)(cpu); \ __verify_pcpu_ptr(ptr); \ - (typeof(*(ptr)) __kernel __force *)(ptr); \ + PERCPU_PTR(ptr); \ }) #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -- cgit v1.2.3 From dabddd687c9e1a06241d6b4d1f66b9f2b60b3ad1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Oct 2024 10:07:38 +0200 Subject: percpu: cast percpu pointer in PERCPU_PTR() via unsigned long Cast pointer from percpu address space to generic (kernel) address space in PERCPU_PTR() macro via unsigned long intermediate cast [1]. This intermediate cast is also required to avoid build failure when GCC's strict named address space checks for x86 targets [2] are enabled. Found by GCC's named address space checks. [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name [2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces Link: https://lkml.kernel.org/r/20241021080856.48746-3-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index e1cf7982424f..35842d1e3879 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -221,7 +221,10 @@ do { \ } while (0) #define PERCPU_PTR(__p) \ - (typeof(*(__p)) __force __kernel *)(__p); +({ \ + unsigned long __pcpu_ptr = (__force unsigned long)(__p); \ + (typeof(*(__p)) __force __kernel *)(__pcpu_ptr); \ +}) #ifdef CONFIG_SMP -- cgit v1.2.3 From 92a8b224b833e82d286d2100432adbac8cf8a2a1 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:51 +0800 Subject: lib/min_heap: introduce non-inline versions of min heap API functions Patch series "Enhance min heap API with non-inline functions and optimizations", v2. Add non-inline versions of the min heap API functions in lib/min_heap.c and updates all users outside of kernel/events/core.c to use these non-inline versions. To mitigate the performance impact of indirect function calls caused by the non-inline versions of the swap and compare functions, a builtin swap has been introduced that swaps elements based on their size. Additionally, it micro-optimizes the efficiency of the min heap by pre-scaling the counter, following the same approach as in lib/sort.c. Documentation for the min heap API has also been added to the core-api section. This patch (of 10): All current min heap API functions are marked with '__always_inline'. However, as the number of users increases, inlining these functions everywhere leads to a increase in kernel size. In performance-critical paths, such as when perf events are enabled and min heap functions are called on every context switch, it is important to retain the inline versions for optimal performance. To balance this, the original inline functions are kept, and additional non-inline versions of the functions have been added in lib/min_heap.c. Link: https://lkml.kernel.org/r/20241020040200.939973-1-visitorckw@gmail.com Link: https://lore.kernel.org/20240522161048.8d8bbc7b153b4ecd92c50666@linux-foundation.org Link: https://lkml.kernel.org/r/20241020040200.939973-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Suggested-by: Andrew Morton Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: Kuan-Wei Chiu Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 129 +++++++++++++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 43a7b9dcf15e..0abb21173979 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -40,7 +40,7 @@ struct min_heap_callbacks { /* Initialize a min-heap. */ static __always_inline -void __min_heap_init(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, int size) { heap->nr = 0; heap->size = size; @@ -50,33 +50,33 @@ void __min_heap_init(min_heap_char *heap, void *data, int size) heap->data = heap->preallocated; } -#define min_heap_init(_heap, _data, _size) \ - __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_init_inline(_heap, _data, _size) \ + __min_heap_init_inline((min_heap_char *)_heap, _data, _size) /* Get the minimum element from the heap. */ static __always_inline -void *__min_heap_peek(struct min_heap_char *heap) +void *__min_heap_peek_inline(struct min_heap_char *heap) { return heap->nr ? heap->data : NULL; } -#define min_heap_peek(_heap) \ - (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_peek_inline(_heap) \ + (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) /* Check if the heap is full. */ static __always_inline -bool __min_heap_full(min_heap_char *heap) +bool __min_heap_full_inline(min_heap_char *heap) { return heap->nr == heap->size; } -#define min_heap_full(_heap) \ - __min_heap_full((min_heap_char *)_heap) +#define min_heap_full_inline(_heap) \ + __min_heap_full_inline((min_heap_char *)_heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *left, *right; void *data = heap->data; @@ -108,13 +108,14 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, } } -#define min_heap_sift_down(_heap, _pos, _func, _args) \ - __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ + _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline -void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; size_t parent; @@ -128,27 +129,28 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, } } -#define min_heap_sift_up(_heap, _idx, _func, _args) \ - __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ + __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline -void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heap_sift_down(heap, i, elem_size, func, args); + __min_heap_sift_down_inline(heap, i, elem_size, func, args); } -#define min_heapify_all(_heap, _func, _args) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heapify_all_inline(_heap, _func, _args) \ + __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -158,13 +160,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); return true; } -#define min_heap_pop(_heap, _func, _args) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_inline(_heap, _func, _args) \ + __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -172,22 +174,21 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, * efficient than a pop followed by a push that does 2. */ static __always_inline -void __min_heap_pop_push(min_heap_char *heap, - const void *element, size_t elem_size, - const struct min_heap_callbacks *func, - void *args) +void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { memcpy(heap->data, element, elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); } -#define min_heap_pop_push(_heap, _element, _func, _args) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push_inline(_heap, _element, _func, _args) \ + __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; int pos; @@ -201,18 +202,19 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, heap->nr++; /* Sift child at pos up. */ - __min_heap_sift_up(heap, elem_size, pos, func, args); + __min_heap_sift_up_inline(heap, elem_size, pos, func, args); return true; } -#define min_heap_push(_heap, _element, _func, _args) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_push_inline(_heap, _element, _func, _args) \ + __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -224,12 +226,53 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, if (idx == heap->nr) return true; func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); - __min_heap_sift_up(heap, elem_size, idx, func, args); - __min_heap_sift_down(heap, idx, elem_size, func, args); + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); + __min_heap_sift_down_inline(heap, idx, elem_size, func, args); return true; } +#define min_heap_del_inline(_heap, _idx, _func, _args) \ + __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) + +void __min_heap_init(min_heap_char *heap, void *data, int size); +void *__min_heap_peek(struct min_heap_char *heap); +bool __min_heap_full(min_heap_char *heap); +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); + +#define min_heap_init(_heap, _data, _size) \ + __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_peek(_heap) \ + (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_full(_heap) \ + __min_heap_full((min_heap_char *)_heap) +#define min_heap_sift_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_up(_heap, _idx, _func, _args) \ + __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heapify_all(_heap, _func, _args) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop(_heap, _func, _args) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push(_heap, _element, _func, _args) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) +#define min_heap_push(_heap, _element, _func, _args) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) -- cgit v1.2.3 From aa5888afc2347ebb394c2c4b694fa3026775009e Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:52 +0800 Subject: lib min_heap: optimize min heap by prescaling counters for better performance Improve the efficiency of the min heap by prescaling counters, eliminating the need to repeatedly compute 'index * element_size' when accessing elements. By doing so, we avoid the overhead associated with recalculating the byte offset for each heap operation. However, with prescaling, the calculation for the parent element's location is no longer as simple as '(i - 1) / 2'. To address this, we copy the parent function from 'lib/sort.c', which calculates the parent offset in a branchless manner without using any division instructions. This optimization should result in a more efficient heap implementation by reducing the computational overhead of finding parent and child offsets. Link: https://lkml.kernel.org/r/20241020040200.939973-3-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 73 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 0abb21173979..41b092a14238 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,6 +38,32 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs, void *args); }; +/** + * parent - given the offset of the child, find the offset of the parent. + * @i: the offset of the heap element whose parent is sought. Non-zero. + * @lsbit: a precomputed 1-bit mask, equal to "size & -size" + * @size: size of each element + * + * In terms of array indexes, the parent of element j = @i/@size is simply + * (j-1)/2. But when working in byte offsets, we can't use implicit + * truncation of integer divides. + * + * Fortunately, we only need one bit of the quotient, not the full divide. + * @size has a least significant bit. That bit will be clear if @i is + * an even multiple of @size, and set if it's an odd multiple. + * + * Logically, we're doing "if (i & lsbit) i -= size;", but since the + * branch is unpredictable, it's done with a bit of clever branch-free + * code instead. + */ +__attribute_const__ __always_inline +static size_t parent(size_t i, unsigned int lsbit, size_t size) +{ + i -= size; + i -= size & -(i & lsbit); + return i / 2; +} + /* Initialize a min-heap. */ static __always_inline void __min_heap_init_inline(min_heap_char *heap, void *data, int size) @@ -78,33 +104,30 @@ static __always_inline void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { - void *left, *right; + const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; - void *root = data + pos * elem_size; - int i = pos, j; + /* pre-scale counters for performance */ + size_t a = pos * elem_size; + size_t b, c, d; + size_t n = heap->nr * elem_size; /* Find the sift-down path all the way to the leaves. */ - for (;;) { - if (i * 2 + 2 >= heap->nr) - break; - left = data + (i * 2 + 1) * elem_size; - right = data + (i * 2 + 2) * elem_size; - i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2; - } + for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;) + b = func->less(data + c, data + d, args) ? c : d; /* Special case for the last leaf with no sibling. */ - if (i * 2 + 2 == heap->nr) - i = i * 2 + 1; + if (d == n) + b = c; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * elem_size, args)) - i = (i - 1) / 2; + while (b != a && func->less(data + a, data + b, args)) + b = parent(b, lsbit, elem_size); /* Shift the element into its correct place. */ - j = i; - while (i != pos) { - i = (i - 1) / 2; - func->swp(data + i * elem_size, data + j * elem_size, args); + c = b; + while (b != a) { + b = parent(b, lsbit, elem_size); + func->swp(data + b, data + c, args); } } @@ -117,15 +140,17 @@ static __always_inline void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args) { + const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; - size_t parent; + /* pre-scale counters for performance */ + size_t a = idx * elem_size, b; - while (idx) { - parent = (idx - 1) / 2; - if (func->less(data + parent * elem_size, data + idx * elem_size, args)) + while (a) { + b = parent(a, lsbit, elem_size); + if (func->less(data + b, data + a, args)) break; - func->swp(data + parent * elem_size, data + idx * elem_size, args); - idx = parent; + func->swp(data + a, data + b, args); + a = b; } } -- cgit v1.2.3 From 03ec56d084611b5a4dc06ffa74db0928616e4d7f Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:53 +0800 Subject: lib min_heap: avoid indirect function call by providing default swap The non-inline min heap API can result in an indirect function call to the custom swap function. This becomes particularly costly when CONFIG_MITIGATION_RETPOLINE is enabled, as indirect function calls are expensive in this case. To address this, copy the code from lib/sort.c and provide a default builtin swap implementation that performs element swaps based on the element size. This change allows most users to avoid the overhead of indirect function calls, improving efficiency. Link: https://lkml.kernel.org/r/20241020040200.939973-4-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 159 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 41b092a14238..e781727c8916 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,6 +38,147 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs, void *args); }; +/** + * is_aligned - is this pointer & size okay for word-wide copying? + * @base: pointer to data + * @size: size of each element + * @align: required alignment (typically 4 or 8) + * + * Returns true if elements can be copied using word loads and stores. + * The size must be a multiple of the alignment, and the base address must + * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + * + * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" + * to "if ((a | b) & mask)", so we do that by hand. + */ +__attribute_const__ __always_inline +static bool is_aligned(const void *base, size_t size, unsigned char align) +{ + unsigned char lsbits = (unsigned char)size; + + (void)base; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + lsbits |= (unsigned char)(uintptr_t)base; +#endif + return (lsbits & (align - 1)) == 0; +} + +/** + * swap_words_32 - swap two elements in 32-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 4) + * + * Exchange the two objects in memory. This exploits base+index addressing, + * which basically all CPUs have, to minimize loop overhead computations. + * + * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the + * bottom of the loop, even though the zero flag is still valid from the + * subtract (since the intervening mov instructions don't alter the flags). + * Gcc 8.1.0 doesn't have that problem. + */ +static __always_inline +void swap_words_32(void *a, void *b, size_t n) +{ + do { + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + } while (n); +} + +/** + * swap_words_64 - swap two elements in 64-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 8) + * + * Exchange the two objects in memory. This exploits base+index + * addressing, which basically all CPUs have, to minimize loop overhead + * computations. + * + * We'd like to use 64-bit loads if possible. If they're not, emulating + * one requires base+index+4 addressing which x86 has but most other + * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, + * but it's possible to have 64-bit loads without 64-bit pointers (e.g. + * x32 ABI). Are there any cases the kernel needs to worry about? + */ +static __always_inline +void swap_words_64(void *a, void *b, size_t n) +{ + do { +#ifdef CONFIG_64BIT + u64 t = *(u64 *)(a + (n -= 8)); + *(u64 *)(a + n) = *(u64 *)(b + n); + *(u64 *)(b + n) = t; +#else + /* Use two 32-bit transfers to avoid base+index+4 addressing */ + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + + t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; +#endif + } while (n); +} + +/** + * swap_bytes - swap two elements a byte at a time + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size + * + * This is the fallback if alignment doesn't allow using larger chunks. + */ +static __always_inline +void swap_bytes(void *a, void *b, size_t n) +{ + do { + char t = ((char *)a)[--n]; + ((char *)a)[n] = ((char *)b)[n]; + ((char *)b)[n] = t; + } while (n); +} + +/* + * The values are arbitrary as long as they can't be confused with + * a pointer, but small integers make for the smallest compare + * instructions. + */ +#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0) +#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1) +#define SWAP_BYTES ((void (*)(void *, void *, void *))2) + +/* + * Selects the appropriate swap function based on the element size. + */ +static __always_inline +void *select_swap_func(const void *base, size_t size) +{ + if (is_aligned(base, size, 8)) + return SWAP_WORDS_64; + else if (is_aligned(base, size, 4)) + return SWAP_WORDS_32; + else + return SWAP_BYTES; +} + +static __always_inline +void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args), + void *priv) +{ + if (swap_func == SWAP_WORDS_64) + swap_words_64(a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32(a, b, size); + else if (swap_func == SWAP_BYTES) + swap_bytes(a, b, size); + else + swap_func(a, b, priv); +} + /** * parent - given the offset of the child, find the offset of the parent. * @i: the offset of the heap element whose parent is sought. Non-zero. @@ -106,11 +247,15 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, { const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; /* pre-scale counters for performance */ size_t a = pos * elem_size; size_t b, c, d; size_t n = heap->nr * elem_size; + if (!swp) + swp = select_swap_func(data, elem_size); + /* Find the sift-down path all the way to the leaves. */ for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;) b = func->less(data + c, data + d, args) ? c : d; @@ -127,7 +272,7 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, c = b; while (b != a) { b = parent(b, lsbit, elem_size); - func->swp(data + b, data + c, args); + do_swap(data + b, data + c, elem_size, swp, args); } } @@ -142,14 +287,18 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx { const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; /* pre-scale counters for performance */ size_t a = idx * elem_size, b; + if (!swp) + swp = select_swap_func(data, elem_size); + while (a) { b = parent(a, lsbit, elem_size); if (func->less(data + b, data + a, args)) break; - func->swp(data + a, data + b, args); + do_swap(data + a, data + b, elem_size, swp, args); a = b; } } @@ -242,15 +391,19 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) return false; + if (!swp) + swp = select_swap_func(data, elem_size); + /* Place last element at the root (position 0) and then sift down. */ heap->nr--; if (idx == heap->nr) return true; - func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); + do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args); __min_heap_sift_up_inline(heap, elem_size, idx, func, args); __min_heap_sift_down_inline(heap, idx, elem_size, func, args); -- cgit v1.2.3 From 84bfbfbbd32aee136afea4b6bf82581dce79c305 Mon Sep 17 00:00:00 2001 From: Maurice Lambert Date: Sun, 3 Nov 2024 23:39:50 +0100 Subject: netlink: typographical error in nlmsg_type constants definition This commit fix a typographical error in netlink nlmsg_type constants definition in the include/uapi/linux/rtnetlink.h at line 177. The definition is RTM_NEWNVLAN RTM_NEWVLAN instead of RTM_NEWVLAN RTM_NEWVLAN. Signed-off-by: Maurice Lambert Fixes: 8dcea187088b ("net: bridge: vlan: add rtm definitions and dump support") Link: https://patch.msgid.link/20241103223950.230300-1-mauricelambert434@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3b687d20c9ed..db7254d52d93 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -174,7 +174,7 @@ enum { #define RTM_GETLINKPROP RTM_GETLINKPROP RTM_NEWVLAN = 112, -#define RTM_NEWNVLAN RTM_NEWVLAN +#define RTM_NEWVLAN RTM_NEWVLAN RTM_DELVLAN, #define RTM_DELVLAN RTM_DELVLAN RTM_GETVLAN, -- cgit v1.2.3 From e1ef630c56d36770e180f0d0bf7b61b5289f5c48 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 1 Nov 2024 11:57:13 +0200 Subject: clk: Add devm_clk_hw_register_gate_parent_hw() Add devm_clk_hw_register_gate_parent_hw() macro to allow registering devres managed gate clocks providing struct clk_hw object as parent. Reviewed-by: Geert Uytterhoeven Acked-by: Stephen Boyd Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/20241101095720.2247815-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/linux/clk-provider.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 7e43caabb54b..28cf7d103e92 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -622,6 +622,24 @@ struct clk *clk_register_gate(struct device *dev, const char *name, __devm_clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \ NULL, (flags), (reg), (bit_idx), \ (clk_gate_flags), (lock)) +/** + * devm_clk_hw_register_gate_parent_hw - register a gate clock with the clock + * framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_hw: pointer to parent clk + * @flags: framework-specific flags for this clock + * @reg: register address to control gating of this clock + * @bit_idx: which bit in the register controls gating of this clock + * @clk_gate_flags: gate-specific flags for this clock + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_gate_parent_hw(dev, name, parent_hw, flags, \ + reg, bit_idx, clk_gate_flags, \ + lock) \ + __devm_clk_hw_register_gate((dev), NULL, (name), NULL, (parent_hw), \ + NULL, (flags), (reg), (bit_idx), \ + (clk_gate_flags), (lock)) /** * devm_clk_hw_register_gate_parent_data - register a gate clock with the * clock framework -- cgit v1.2.3 From 32360bf6a5d4016669c3545e7b0ec939937f5331 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Wed, 4 Sep 2024 01:39:30 +0300 Subject: leds: Introduce ordered workqueue for LEDs events instead of system_wq This allows to setup ordered workqueue for LEDs events. This may be useful, because default 'system_wq' does not guarantee execution order of each work_struct, thus for several brightness update requests (for multiple LEDs), real brightness switch could be in random order. Yes, for sysfs-based LEDs we have flush_work() call inside brightness_store() operation, but it's blocking call, so userspace caller can be blocked at a long time, which means LEDs animation stream can be broken. Ordered workqueue has the same behaviour as system_wq + flush_work(), but all scheduled works are async and userspace caller is not blocked, which it better for userspace animation scheduling. Signed-off-by: Alexey Romanov Signed-off-by: Dmitry Rokosov Link: https://lore.kernel.org/r/20240903223936.21292-1-ddrokosov@salutedevices.com [Lee: Couple of style fix-ups] Signed-off-by: Lee Jones --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index e5968c3ed4ae..9f34b9facaad 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -171,6 +171,7 @@ struct led_classdev { int new_blink_brightness; void (*flash_resume)(struct led_classdev *led_cdev); + struct workqueue_struct *wq; /* LED workqueue */ struct work_struct set_brightness_work; int delayed_set_value; unsigned long delayed_delay_on; -- cgit v1.2.3 From 4ca7cd938725a4050dcd62ae9472e931d603118d Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Sun, 3 Nov 2024 21:35:27 +0530 Subject: leds: class: Protect brightness_show() with led_cdev->led_access mutex There is NULL pointer issue observed if from Process A where hid device being added which results in adding a led_cdev addition and later a another call to access of led_cdev attribute from Process B can result in NULL pointer issue. Use mutex led_cdev->led_access to protect access to led->cdev and its attribute inside brightness_show() and max_brightness_show() and also update the comment for mutex that it should be used to protect the led class device fields. Process A Process B kthread+0x114 worker_thread+0x244 process_scheduled_works+0x248 uhid_device_add_worker+0x24 hid_add_device+0x120 device_add+0x268 bus_probe_device+0x94 device_initial_probe+0x14 __device_attach+0xfc bus_for_each_drv+0x10c __device_attach_driver+0x14c driver_probe_device+0x3c __driver_probe_device+0xa0 really_probe+0x190 hid_device_probe+0x130 ps_probe+0x990 ps_led_register+0x94 devm_led_classdev_register_ext+0x58 led_classdev_register_ext+0x1f8 device_create_with_groups+0x48 device_create_groups_vargs+0xc8 device_add+0x244 kobject_uevent+0x14 kobject_uevent_env[jt]+0x224 mutex_unlock[jt]+0xc4 __mutex_unlock_slowpath+0xd4 wake_up_q+0x70 try_to_wake_up[jt]+0x48c preempt_schedule_common+0x28 __schedule+0x628 __switch_to+0x174 el0t_64_sync+0x1a8/0x1ac el0t_64_sync_handler+0x68/0xbc el0_svc+0x38/0x68 do_el0_svc+0x1c/0x28 el0_svc_common+0x80/0xe0 invoke_syscall+0x58/0x114 __arm64_sys_read+0x1c/0x2c ksys_read+0x78/0xe8 vfs_read+0x1e0/0x2c8 kernfs_fop_read_iter+0x68/0x1b4 seq_read_iter+0x158/0x4ec kernfs_seq_show+0x44/0x54 sysfs_kf_seq_show+0xb4/0x130 dev_attr_show+0x38/0x74 brightness_show+0x20/0x4c dualshock4_led_get_brightness+0xc/0x74 [ 3313.874295][ T4013] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000060 [ 3313.874301][ T4013] Mem abort info: [ 3313.874303][ T4013] ESR = 0x0000000096000006 [ 3313.874305][ T4013] EC = 0x25: DABT (current EL), IL = 32 bits [ 3313.874307][ T4013] SET = 0, FnV = 0 [ 3313.874309][ T4013] EA = 0, S1PTW = 0 [ 3313.874311][ T4013] FSC = 0x06: level 2 translation fault [ 3313.874313][ T4013] Data abort info: [ 3313.874314][ T4013] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 [ 3313.874316][ T4013] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 3313.874318][ T4013] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 3313.874320][ T4013] user pgtable: 4k pages, 39-bit VAs, pgdp=00000008f2b0a000 .. [ 3313.874332][ T4013] Dumping ftrace buffer: [ 3313.874334][ T4013] (ftrace buffer empty) .. .. [ dd3313.874639][ T4013] CPU: 6 PID: 4013 Comm: InputReader [ 3313.874648][ T4013] pc : dualshock4_led_get_brightness+0xc/0x74 [ 3313.874653][ T4013] lr : led_update_brightness+0x38/0x60 [ 3313.874656][ T4013] sp : ffffffc0b910bbd0 .. .. [ 3313.874685][ T4013] Call trace: [ 3313.874687][ T4013] dualshock4_led_get_brightness+0xc/0x74 [ 3313.874690][ T4013] brightness_show+0x20/0x4c [ 3313.874692][ T4013] dev_attr_show+0x38/0x74 [ 3313.874696][ T4013] sysfs_kf_seq_show+0xb4/0x130 [ 3313.874700][ T4013] kernfs_seq_show+0x44/0x54 [ 3313.874703][ T4013] seq_read_iter+0x158/0x4ec [ 3313.874705][ T4013] kernfs_fop_read_iter+0x68/0x1b4 [ 3313.874708][ T4013] vfs_read+0x1e0/0x2c8 [ 3313.874711][ T4013] ksys_read+0x78/0xe8 [ 3313.874714][ T4013] __arm64_sys_read+0x1c/0x2c [ 3313.874718][ T4013] invoke_syscall+0x58/0x114 [ 3313.874721][ T4013] el0_svc_common+0x80/0xe0 [ 3313.874724][ T4013] do_el0_svc+0x1c/0x28 [ 3313.874727][ T4013] el0_svc+0x38/0x68 [ 3313.874730][ T4013] el0t_64_sync_handler+0x68/0xbc [ 3313.874732][ T4013] el0t_64_sync+0x1a8/0x1ac Signed-off-by: Mukesh Ojha Reviewed-by: Anish Kumar Link: https://lore.kernel.org/r/20241103160527.82487-1-quic_mojha@quicinc.com Signed-off-by: Lee Jones --- include/linux/leds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 9f34b9facaad..98f9719c924c 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -239,7 +239,7 @@ struct led_classdev { struct kernfs_node *brightness_hw_changed_kn; #endif - /* Ensures consistent access to the LED Flash Class device */ + /* Ensures consistent access to the LED class device */ struct mutex led_access; }; -- cgit v1.2.3 From 33b091c08ed85e023c21376e6f787355fd46b440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 1 Nov 2024 13:42:49 -0300 Subject: libfs: Fix kernel-doc warning in generic_ci_validate_strict_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the indentation of the return values from generic_ci_validate_strict_name() to properly render the comment and to address a `make htmldocs` warning: Documentation/filesystems/api-summary:14: include/linux/fs.h:3504: WARNING: Bullet list ends without a blank line; unexpected unindent. Fixes: 0e152beb5aa1 ("libfs: Create the helper function generic_ci_validate_strict_name()") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20241030162435.05425f60@canb.auug.org.au/ Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241101164251.327884-2-andrealmeid@igalia.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b277369672a1..001d580af862 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3477,12 +3477,12 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, * @name: name of the new file * * Return: - * * True if the filename is suitable for this directory. It can be - * true if a given name is not suitable for a strict encoding - * directory, but the directory being used isn't strict + * * True: if the filename is suitable for this directory. It can be + * true if a given name is not suitable for a strict encoding + * directory, but the directory being used isn't strict * * False if the filename isn't suitable for this directory. This only - * happens when a directory is casefolded and the filesystem is strict - * about its encoding. + * happens when a directory is casefolded and the filesystem is strict + * about its encoding. */ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) { -- cgit v1.2.3 From e57dfaa4b0a72f6a231a8eedb95d260045bbd8db Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 31 Oct 2024 16:52:57 +0100 Subject: xfrm: Convert struct xfrm_dst_lookup_params -> tos to dscp_t. Add type annotation to the "tos" field of struct xfrm_dst_lookup_params, to ensure that the ECN bits aren't mistakenly taken into account when doing route lookups. Rename that field (tos -> dscp) to make that change explicit. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2b87999bd5aa..32c09e85a64c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -354,7 +355,7 @@ void xfrm_if_unregister_cb(void); struct xfrm_dst_lookup_params { struct net *net; - int tos; + dscp_t dscp; int oif; xfrm_address_t *saddr; xfrm_address_t *daddr; -- cgit v1.2.3 From 6140be90ec70c39fa844741ca3cc807dd0866394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Fri, 26 Apr 2024 18:20:14 +0200 Subject: fs/xattr: add *at family syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the four syscalls setxattrat(), getxattrat(), listxattrat() and removexattrat(). Those can be used to operate on extended attributes, especially security related ones, either relative to a pinned directory or on a file descriptor without read access, avoiding a /proc//fd/ detour, requiring a mounted procfs. One use case will be setfiles(8) setting SELinux file contexts ("security.selinux") without race conditions and without a file descriptor opened with read access requiring SELinux read permission. Use the do_{name}at() pattern from fs/open.c. Pass the value of the extended attribute, its length, and for setxattrat(2) the command (XATTR_CREATE or XATTR_REPLACE) via an added struct xattr_args to not exceed six syscall arguments and not merging the AT_* and XATTR_* flags. [AV: fixes by Christian Brauner folded in, the entire thing rebased on top of {filename,file}_...xattr() primitives, treatment of empty pathnames regularized. As the result, AT_EMPTY_PATH+NULL handling is cheap, so f...(2) can use it] Signed-off-by: Christian Göttsche Link: https://lore.kernel.org/r/20240426162042.191916-1-cgoettsche@seltendoof.de Reviewed-by: Arnd Bergmann Reviewed-by: Christian Brauner CC: x86@kernel.org CC: linux-alpha@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-arm-kernel@lists.infradead.org CC: linux-ia64@vger.kernel.org CC: linux-m68k@lists.linux-m68k.org CC: linux-mips@vger.kernel.org CC: linux-parisc@vger.kernel.org CC: linuxppc-dev@lists.ozlabs.org CC: linux-s390@vger.kernel.org CC: linux-sh@vger.kernel.org CC: sparclinux@vger.kernel.org CC: linux-fsdevel@vger.kernel.org CC: audit@vger.kernel.org CC: linux-arch@vger.kernel.org CC: linux-api@vger.kernel.org CC: linux-security-module@vger.kernel.org CC: selinux@vger.kernel.org [brauner: slight tweaks] Signed-off-by: Christian Brauner Signed-off-by: Al Viro --- include/asm-generic/audit_change_attr.h | 6 ++++++ include/linux/syscalls.h | 13 +++++++++++++ include/linux/xattr.h | 4 ++++ include/uapi/asm-generic/unistd.h | 11 ++++++++++- include/uapi/linux/xattr.h | 7 +++++++ 5 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h index 331670807cf0..cc840537885f 100644 --- a/include/asm-generic/audit_change_attr.h +++ b/include/asm-generic/audit_change_attr.h @@ -11,9 +11,15 @@ __NR_lchown, __NR_fchown, #endif __NR_setxattr, +#ifdef __NR_setxattrat +__NR_setxattrat, +#endif __NR_lsetxattr, __NR_fsetxattr, __NR_removexattr, +#ifdef __NR_removexattrat +__NR_removexattrat, +#endif __NR_lremovexattr, __NR_fremovexattr, #ifdef __NR_fchownat diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5758104921e6..c6333204d451 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -77,6 +77,7 @@ struct cachestat_range; struct cachestat; struct statmount; struct mnt_id_req; +struct xattr_args; #include #include @@ -338,23 +339,35 @@ asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); asmlinkage long sys_setxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); +asmlinkage long sys_setxattrat(int dfd, const char __user *path, unsigned int at_flags, + const char __user *name, + const struct xattr_args __user *args, size_t size); asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_getxattr(const char __user *path, const char __user *name, void __user *value, size_t size); +asmlinkage long sys_getxattrat(int dfd, const char __user *path, unsigned int at_flags, + const char __user *name, + struct xattr_args __user *args, size_t size); asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size); asmlinkage long sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size); asmlinkage long sys_listxattr(const char __user *path, char __user *list, size_t size); +asmlinkage long sys_listxattrat(int dfd, const char __user *path, + unsigned int at_flags, + char __user *list, size_t size); asmlinkage long sys_llistxattr(const char __user *path, char __user *list, size_t size); asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size); asmlinkage long sys_removexattr(const char __user *path, const char __user *name); +asmlinkage long sys_removexattrat(int dfd, const char __user *path, + unsigned int at_flags, + const char __user *name); asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d20051865800..86b0d47984a1 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -19,6 +19,10 @@ #include #include +/* List of all open_how "versions". */ +#define XATTR_ARGS_SIZE_VER0 16 /* sizeof first published struct */ +#define XATTR_ARGS_SIZE_LATEST XATTR_ARGS_SIZE_VER0 + struct inode; struct dentry; diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 5bf6148cac2b..88dc393c2bca 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,8 +841,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_setxattrat 463 +__SYSCALL(__NR_setxattrat, sys_setxattrat) +#define __NR_getxattrat 464 +__SYSCALL(__NR_getxattrat, sys_getxattrat) +#define __NR_listxattrat 465 +__SYSCALL(__NR_listxattrat, sys_listxattrat) +#define __NR_removexattrat 466 +__SYSCALL(__NR_removexattrat, sys_removexattrat) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 467 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 9463db2dfa9d..9854f9cff3c6 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -11,6 +11,7 @@ */ #include +#include #ifndef _UAPI_LINUX_XATTR_H #define _UAPI_LINUX_XATTR_H @@ -20,6 +21,12 @@ #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ + +struct xattr_args { + __aligned_u64 __user value; + __u32 size; + __u32 flags; +}; #endif /* Namespaces */ -- cgit v1.2.3 From 44d0469f79bd3d0b3433732877358df7dc6b17b1 Mon Sep 17 00:00:00 2001 From: Zijian Zhang Date: Wed, 6 Nov 2024 00:37:42 +0000 Subject: bpf: Add sk_is_inet and IS_ICSK check in tls_sw_has_ctx_tx/rx As the introduction of the support for vsock and unix sockets in sockmap, tls_sw_has_ctx_tx/rx cannot presume the socket passed in must be IS_ICSK. vsock and af_unix sockets have vsock_sock and unix_sock instead of inet_connection_sock. For these sockets, tls_get_ctx may return an invalid pointer and cause page fault in function tls_sw_ctx_rx. BUG: unable to handle page fault for address: 0000000000040030 Workqueue: vsock-loopback vsock_loopback_work RIP: 0010:sk_psock_strp_data_ready+0x23/0x60 Call Trace: ? __die+0x81/0xc3 ? no_context+0x194/0x350 ? do_page_fault+0x30/0x110 ? async_page_fault+0x3e/0x50 ? sk_psock_strp_data_ready+0x23/0x60 virtio_transport_recv_pkt+0x750/0x800 ? update_load_avg+0x7e/0x620 vsock_loopback_work+0xd0/0x100 process_one_work+0x1a7/0x360 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x112/0x130 ? __kthread_cancel_work+0x40/0x40 ret_from_fork+0x1f/0x40 v2: - Add IS_ICSK check v3: - Update the commits in Fixes Fixes: 634f1a7110b4 ("vsock: support sockmap") Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Signed-off-by: Zijian Zhang Acked-by: Stanislav Fomichev Acked-by: Jakub Kicinski Reviewed-by: Cong Wang Acked-by: Stefano Garzarella Link: https://lore.kernel.org/r/20241106003742.399240-1-zijianzhang@bytedance.com Signed-off-by: Martin KaFai Lau --- include/net/tls.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 3a33924db2bc..61fef2880114 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -390,8 +390,12 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx) static inline bool tls_sw_has_ctx_tx(const struct sock *sk) { - struct tls_context *ctx = tls_get_ctx(sk); + struct tls_context *ctx; + + if (!sk_is_inet(sk) || !inet_test_bit(IS_ICSK, sk)) + return false; + ctx = tls_get_ctx(sk); if (!ctx) return false; return !!tls_sw_ctx_tx(ctx); @@ -399,8 +403,12 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk) static inline bool tls_sw_has_ctx_rx(const struct sock *sk) { - struct tls_context *ctx = tls_get_ctx(sk); + struct tls_context *ctx; + + if (!sk_is_inet(sk) || !inet_test_bit(IS_ICSK, sk)) + return false; + ctx = tls_get_ctx(sk); if (!ctx) return false; return !!tls_sw_ctx_rx(ctx); -- cgit v1.2.3 From 6f94cbc29adacc15007c5a16295052e674099282 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 3 Nov 2024 08:46:07 -0700 Subject: io_uring/rsrc: split io_kiocb node type assignments Currently the io_rsrc_node assignment in io_kiocb is an array of two pointers, as two nodes may be assigned to a request - one file node, and one buffer node. However, the buffer node can co-exist with the provided buffers, as currently it's not supported to use both provided and registered buffers at the same time. This crucially brings struct io_kiocb down to 4 cache lines again, as before it spilled into the 5th cacheline. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index d52fec533c51..01e7fb9fcfe2 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -475,6 +475,7 @@ enum { REQ_F_BL_EMPTY_BIT, REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, + REQ_F_BUF_NODE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -553,6 +554,8 @@ enum { REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), /* buffer ring head needs incrementing on put */ REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), + /* buf node is valid */ + REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -633,6 +636,8 @@ struct io_kiocb { * REQ_F_BUFFER_RING is set. */ struct io_buffer_list *buf_list; + + struct io_rsrc_node *buf_node; }; union { @@ -642,7 +647,7 @@ struct io_kiocb { __poll_t apoll_events; }; - struct io_rsrc_node *rsrc_nodes[2]; + struct io_rsrc_node *file_node; atomic_t refs; bool cancel_seq_set; -- cgit v1.2.3 From b6f58a3f4aa8dba424356c7a69388a81f4459300 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 3 Nov 2024 10:23:38 -0700 Subject: io_uring: move struct io_kiocb from task_struct to io_uring_task Rather than store the task_struct itself in struct io_kiocb, store the io_uring specific task_struct. The life times are the same in terms of io_uring, and this avoids doing some dereferences through the task_struct. For the hot path of putting local task references, we can deref req->tctx instead, which we'll need anyway in that function regardless of whether it's local or remote references. This is mostly straight forward, except the original task PF_EXITING check needs a bit of tweaking. task_work is _always_ run from the originating task, except in the fallback case, where it's run from a kernel thread. Replace the potentially racy (in case of fallback work) checks for req->task->flags with current->flags. It's either the still the original task, in which case PF_EXITING will be sane, or it has PF_KTHREAD set, in which case it's fallback work. Both cases should prevent moving forward with the given request. Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 2 +- include/linux/io_uring_types.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index c189d36ad55e..578a3fdf5c71 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) { - return cmd_to_io_kiocb(cmd)->task; + return cmd_to_io_kiocb(cmd)->tctx->task; } #endif /* _LINUX_IO_URING_CMD_H */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 01e7fb9fcfe2..fba2988accc3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -84,6 +84,7 @@ struct io_uring_task { /* submission side */ int cached_refs; const struct io_ring_ctx *last; + struct task_struct *task; struct io_wq *io_wq; struct file *registered_rings[IO_RINGFD_REG_MAX]; @@ -625,7 +626,7 @@ struct io_kiocb { struct io_cqe cqe; struct io_ring_ctx *ctx; - struct task_struct *task; + struct io_uring_task *tctx; union { /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ -- cgit v1.2.3 From 6bf90bd8c58a305994948eb3409d91a7d8f2edae Mon Sep 17 00:00:00 2001 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:29:24 -0400 Subject: io_uring/napi: add static napi tracking strategy Add the static napi tracking strategy. That allows the user to manually manage the napi ids list for busy polling, and eliminate the overhead of dynamically updating the list from the fast path. Signed-off-by: Olivier Langlois Link: https://lore.kernel.org/r/96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- include/uapi/linux/io_uring.h | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fba2988accc3..072e65e93105 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -408,7 +408,7 @@ struct io_ring_ctx { /* napi busy poll default timeout */ ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; - bool napi_enabled; + u8 napi_track_mode; DECLARE_HASHTABLE(napi_ht, 4); #endif diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 47977a5c65f5..5d08435b95a8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -790,12 +790,40 @@ struct io_uring_buf_status { __u32 resv[8]; }; +enum io_uring_napi_op { + /* register/ungister backward compatible opcode */ + IO_URING_NAPI_REGISTER_OP = 0, + + /* opcodes to update napi_list when static tracking is used */ + IO_URING_NAPI_STATIC_ADD_ID = 1, + IO_URING_NAPI_STATIC_DEL_ID = 2 +}; + +enum io_uring_napi_tracking_strategy { + /* value must be 0 for backward compatibility */ + IO_URING_NAPI_TRACKING_DYNAMIC = 0, + IO_URING_NAPI_TRACKING_STATIC = 1, + IO_URING_NAPI_TRACKING_INACTIVE = 255 +}; + /* argument for IORING_(UN)REGISTER_NAPI */ struct io_uring_napi { __u32 busy_poll_to; __u8 prefer_busy_poll; - __u8 pad[3]; - __u64 resv; + + /* a io_uring_napi_op value */ + __u8 opcode; + __u8 pad[2]; + + /* + * for IO_URING_NAPI_REGISTER_OP, it is a + * io_uring_napi_tracking_strategy value. + * + * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID + * it is the napi id to add/del from napi_list. + */ + __u32 op_param; + __u32 resv; }; /* -- cgit v1.2.3 From 3b96b895127b7c0aed63d82c974b46340e8466c1 Mon Sep 17 00:00:00 2001 From: Esther Shimanovich Date: Tue, 10 Sep 2024 17:57:45 +0000 Subject: PCI: Detect and trust built-in Thunderbolt chips Some computers with CPUs that lack Thunderbolt features use discrete Thunderbolt chips to add Thunderbolt functionality. These Thunderbolt chips are located within the chassis; between the Root Port labeled ExternalFacingPort and the USB-C port. These Thunderbolt PCIe devices should be labeled as fixed and trusted, as they are built into the computer. Otherwise, security policies that rely on those flags may have unintended results, such as preventing USB-C ports from enumerating. Detect the above scenario through the process of elimination. 1) Integrated Thunderbolt host controllers already have Thunderbolt implemented, so anything outside their external facing Root Port is removable and untrusted. Detect them using the following properties: - Most integrated host controllers have the "usb4-host-interface" ACPI property, as described here: https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#mapping-native-protocols-pcie-displayport-tunneled-through-usb4-to-usb4-host-routers - Integrated Thunderbolt PCIe Root Ports before Alder Lake do not have the "usb4-host-interface" ACPI property. Identify those by their PCI IDs instead. 2) If a Root Port does not have integrated Thunderbolt capabilities, but has the "ExternalFacingPort" ACPI property, that means the manufacturer has opted to use a discrete Thunderbolt host controller that is built into the computer. This host controller can be identified by virtue of being located directly below an external-facing Root Port that lacks integrated Thunderbolt. Label it as trusted and fixed. Everything downstream from it is untrusted and removable. The "ExternalFacingPort" ACPI property is described here: https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#identifying-externally-exposed-pcie-root-ports Link: https://lore.kernel.org/r/20240910-trust-tbt-fix-v5-1-7a7a42a5f496@chromium.org Suggested-by: Mika Westerberg Signed-off-by: Esther Shimanovich Signed-off-by: Bjorn Helgaas Tested-by: Mika Westerberg Tested-by: Mario Limonciello Reviewed-by: Mika Westerberg Reviewed-by: Mario Limonciello --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..4e77c4230c0a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2609,6 +2609,12 @@ pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif +#if defined(CONFIG_X86) && defined(CONFIG_ACPI) +bool arch_pci_dev_is_removable(struct pci_dev *pdev); +#else +static inline bool arch_pci_dev_is_removable(struct pci_dev *pdev) { return false; } +#endif + #ifdef CONFIG_EEH static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) { -- cgit v1.2.3 From 25caea955cc950507d179f3ef456404b475e8c23 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 31 Oct 2024 14:08:58 +0800 Subject: irqchip: Add T-HEAD C900 ACLINT SSWI driver Add a driver for the T-HEAD C900 ACLINT SSWI device. This device allows the system with T-HEAD cpus to send ipi via fast device interface. Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241031060859.722258-3-inochiama@gmail.com --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2361ed4d2b15..799052249c7b 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_EIOINTC_STARTING, CPUHP_AP_IRQ_AVECINTC_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, + CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, -- cgit v1.2.3 From 513793bc6ab331b947111e8efaf8fcef33fb83e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:31 +0100 Subject: posix-timers: Make signal delivery consistent Signals of timers which are reprogammed, disarmed or deleted can deliver signals related to the past. The POSIX spec is blury about this: - "The effect of disarming or resetting a timer with pending expiration notifications is unspecified." - "The disposition of pending signals for the deleted timer is unspecified." In both cases it is reasonable to expect that pending signals are discarded. Especially in the reprogramming case it does not make sense to account for previous overruns or to deliver a signal for a timer which has been disarmed. This makes the behaviour consistent and understandable. Remove the si_sys_private check from the signal delivery code and invoke posix_timer_deliver_signal() unconditionally for posix timer related signals. Change posix_timer_deliver_signal() so it controls the actual signal delivery via the return value. It now instructs the signal code to drop the signal when: 1) The timer does not longer exist in the hash table 2) The timer signal_seq value is not the same as the si_sys_private value which was set when the signal was queued. This is also a preparatory change to embed the sigqueue into the k_itimer structure, which in turn allows to remove the si_sys_private magic. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241105064213.040348644@linutronix.de --- include/linux/posix-timers.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 02afbb4da7f7..8c6d97412526 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -137,8 +137,6 @@ static inline void clear_posix_cputimers_work(struct task_struct *p) { } static inline void posix_cputimers_init_work(void) { } #endif -#define REQUEUE_PENDING 1 - /** * struct k_itimer - POSIX.1b interval timer structure. * @list: List head for binding the timer to signals->posix_timers -- cgit v1.2.3 From bf635681c906ad056d1fda325de8d1c12c9f8201 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:33 +0100 Subject: posix-cpu-timers: Cleanup the firing logic The firing flag of a posix CPU timer is tristate: 0: when the timer is not about to deliver a signal 1: when the timer has expired, but the signal has not been delivered yet -1: when the timer was queued for signal delivery and a rearm operation raced against it and supressed the signal delivery. This is a pointless exercise as this can be simply expressed with a boolean. Only if set, the signal is delivered. This makes delete and rearm consistent with the rest of the posix timers. Convert firing to bool and fixup the usage sites accordingly and add comments why the timer cannot be dequeued right away. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241105064213.172848618@linutronix.de --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 8c6d97412526..b1de21731a08 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -49,7 +49,7 @@ struct cpu_timer { struct timerqueue_head *head; struct pid *pid; struct list_head elist; - int firing; + bool firing; struct task_struct __rcu *handling; }; -- cgit v1.2.3 From 4cf7bf2a2f1a8ace4a49a1138c8123fdb5990093 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:35 +0100 Subject: posix-cpu-timers: Use dedicated flag for CPU timer nanosleep POSIX CPU timer nanosleep creates a k_itimer on stack and uses the sigq pointer to detect the nanosleep case in the expiry function. Prepare for embedding sigqueue into struct k_itimer by using a dedicated flag for nanosleep. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.238550394@linutronix.de --- include/linux/posix-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index b1de21731a08..bcd01208d795 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -42,6 +42,7 @@ static inline int clockid_to_fd(const clockid_t clk) * @pid: Pointer to target task PID * @elist: List head for the expiry list * @firing: Timer is currently firing + * @nanosleep: Timer is used for nanosleep and is not a regular posix-timer * @handling: Pointer to the task which handles expiry */ struct cpu_timer { @@ -50,6 +51,7 @@ struct cpu_timer { struct pid *pid; struct list_head elist; bool firing; + bool nanosleep; struct task_struct __rcu *handling; }; -- cgit v1.2.3 From 5d916a0988eed5217c103932ff4887c9ae83c89c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:36 +0100 Subject: posix-timers: Add a refcount to struct k_itimer To cure the SIG_IGN handling for posix interval timers, the preallocated sigqueue needs to be embedded into struct k_itimer to prevent life time races of all sorts. To make that work correctly it needs reference counting so that timer deletion does not free the timer prematuraly when there is a signal queued or delivered concurrently. Add a rcuref to the posix timer part. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.304756440@linutronix.de --- include/linux/posix-timers.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index bcd01208d795..9740fd0c2933 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -6,11 +6,13 @@ #include #include #include +#include #include #include struct kernel_siginfo; struct task_struct; +struct k_itimer; static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) @@ -105,6 +107,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, void posixtimer_rearm_itimer(struct task_struct *p); bool posixtimer_deliver_signal(struct kernel_siginfo *info); +void posixtimer_free_timer(struct k_itimer *timer); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -129,6 +132,7 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } static inline void posixtimer_rearm_itimer(struct task_struct *p) { } static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info) { return false; } +static inline void posixtimer_free_timer(struct k_itimer *timer) { } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK @@ -156,6 +160,7 @@ static inline void posix_cputimers_init_work(void) { } * @it_signal: Pointer to the creators signal struct * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) + * @rcuref: Reference count for life time management * @sigq: Pointer to preallocated sigqueue * @it: Union representing the various posix timer type * internals. @@ -180,6 +185,7 @@ struct k_itimer { struct task_struct *it_process; }; struct sigqueue *sigq; + rcuref_t rcuref; union { struct { struct hrtimer timer; @@ -200,4 +206,12 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); +#ifdef CONFIG_POSIX_TIMERS +static inline void posixtimer_putref(struct k_itimer *tmr) +{ + if (rcuref_put(&tmr->rcuref)) + posixtimer_free_timer(tmr); +} +#endif /* !CONFIG_POSIX_TIMERS */ + #endif -- cgit v1.2.3 From 54f1dd642fd088ba969206f09e7afffad7d9db2c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:39 +0100 Subject: signal: Provide posixtimer_sigqueue_init() To cure the SIG_IGN handling for posix interval timers, the preallocated sigqueue needs to be embedded into struct k_itimer to prevent life time races of all sorts. Provide a new function to initialize the embedded sigqueue to prepare for that. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.450427515@linutronix.de --- include/linux/posix-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 9740fd0c2933..200098d27cc0 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -12,6 +12,7 @@ struct kernel_siginfo; struct task_struct; +struct sigqueue; struct k_itimer; static inline clockid_t make_process_cpuclock(const unsigned int pid, @@ -106,6 +107,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, } void posixtimer_rearm_itimer(struct task_struct *p); +bool posixtimer_init_sigqueue(struct sigqueue *q); bool posixtimer_deliver_signal(struct kernel_siginfo *info); void posixtimer_free_timer(struct k_itimer *timer); -- cgit v1.2.3 From ef1c5bcd6daa674392bdf89b8ae889aafd73f956 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:41 +0100 Subject: posix-timers: Store PID type in the timer instead of re-evaluating the signal delivery mode everywhere. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.519086500@linutronix.de --- include/linux/posix-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 200098d27cc0..947176582de9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -180,6 +181,7 @@ struct k_itimer { s64 it_overrun_last; unsigned int it_signal_seq; int it_sigev_notify; + enum pid_type it_pid_type; ktime_t it_interval; struct signal_struct *it_signal; union { -- cgit v1.2.3 From 0360ed14d9826678a50fa2b873e522a24cd3c018 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:42 +0100 Subject: signal: Refactor send_sigqueue() To handle posix timers which have their signal ignored via SIG_IGN properly it is required to requeue a ignored signal for delivery when SIG_IGN is lifted so the timer gets rearmed. Split the required code out of send_sigqueue() so it can be reused in context of sigaction(). While at it rename send_sigqueue() to posixtimer_send_sigqueue() so its clear what this is about. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.586453412@linutronix.de --- include/linux/posix-timers.h | 1 + include/linux/sched/signal.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 947176582de9..52611ea923b2 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -109,6 +109,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, void posixtimer_rearm_itimer(struct task_struct *p); bool posixtimer_init_sigqueue(struct sigqueue *q); +int posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info); void posixtimer_free_timer(struct k_itimer *timer); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index bd9f569231d9..36283c1c55e9 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -340,7 +340,6 @@ extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type, int si_private); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline void clear_notify_signal(void) -- cgit v1.2.3 From 11629b9808e5900d675fd469d19932ea48060de3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:43 +0100 Subject: signal: Replace resched_timer logic In preparation for handling ignored posix timer signals correctly and embedding the sigqueue struct into struct k_itimer, hand down a pointer to the sigqueue struct into posix_timer_deliver_signal() instead of just having a boolean flag. No functional change. Suggested-by: Eric W. Biederman Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: "Eric W. Biederman" Link: https://lore.kernel.org/all/20241105064213.652658158@linutronix.de --- include/linux/posix-timers.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 52611ea923b2..39f1db76833a 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -110,7 +110,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, void posixtimer_rearm_itimer(struct task_struct *p); bool posixtimer_init_sigqueue(struct sigqueue *q); int posixtimer_send_sigqueue(struct k_itimer *tmr); -bool posixtimer_deliver_signal(struct kernel_siginfo *info); +bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); /* Init task static initializer */ @@ -135,7 +135,8 @@ static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } static inline void posixtimer_rearm_itimer(struct task_struct *p) { } -static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info) { return false; } +static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, + struct sigqueue *timer_sigq) { return false; } static inline void posixtimer_free_timer(struct k_itimer *timer) { } #endif -- cgit v1.2.3 From 6017a158beb13b412e55a451379798aae5876514 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:45 +0100 Subject: posix-timers: Embed sigqueue in struct k_itimer To cure the SIG_IGN handling for posix interval timers, the preallocated sigqueue needs to be embedded into struct k_itimer to prevent life time races of all sorts. Now that the prerequisites are in place, embed the sigqueue into struct k_itimer and fixup the relevant usage sites. Aside of preparing for proper SIG_IGN handling, this spares an extra allocation. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.719695194@linutronix.de --- include/linux/posix-timers.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 39f1db76833a..28c0a30e0853 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -39,6 +39,8 @@ static inline int clockid_to_fd(const clockid_t clk) #ifdef CONFIG_POSIX_TIMERS +#include + /** * cpu_timer - Posix CPU timer representation for k_itimer * @node: timerqueue node to queue in the task/sig @@ -166,7 +168,7 @@ static inline void posix_cputimers_init_work(void) { } * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) * @rcuref: Reference count for life time management - * @sigq: Pointer to preallocated sigqueue + * @sigq: Embedded sigqueue * @it: Union representing the various posix timer type * internals. * @rcu: RCU head for freeing the timer. @@ -190,7 +192,7 @@ struct k_itimer { struct pid *it_pid; struct task_struct *it_process; }; - struct sigqueue *sigq; + struct sigqueue sigq; rcuref_t rcuref; union { struct { @@ -218,6 +220,23 @@ static inline void posixtimer_putref(struct k_itimer *tmr) if (rcuref_put(&tmr->rcuref)) posixtimer_free_timer(tmr); } + +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) +{ + struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); + + WARN_ON_ONCE(!rcuref_get(&tmr->rcuref)); +} + +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) +{ + struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); + + posixtimer_putref(tmr); +} +#else /* CONFIG_POSIX_TIMERS */ +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } #endif /* !CONFIG_POSIX_TIMERS */ #endif -- cgit v1.2.3 From c2a4796a154bb952be1106911841aab2c8c17c4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:46 +0100 Subject: signal: Cleanup unused posix-timer leftovers Remove the leftovers of sigqueue preallocation as it's not longer used. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.786506636@linutronix.de --- include/linux/sched/signal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 36283c1c55e9..02972fd41931 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -338,8 +338,6 @@ extern void force_fatal_sig(int); extern void force_exit_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline void clear_notify_signal(void) -- cgit v1.2.3 From 647da5f709f112319c0d51e06f330d8afecb1940 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:48 +0100 Subject: posix-timers: Move sequence logic into struct k_itimer The posix timer signal handling uses siginfo::si_sys_private for handling the sequence counter check. That indirection is not longer required and the sequence count value at signal queueing time can be stored in struct k_itimer itself. This removes the requirement of treating siginfo::si_sys_private special as it's now always zero as the kernel does not touch it anymore. Suggested-by: Eric W. Biederman Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: "Eric W. Biederman" Link: https://lore.kernel.org/all/20241105064213.852619866@linutronix.de --- include/linux/posix-timers.h | 2 ++ include/uapi/asm-generic/siginfo.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 28c0a30e0853..49a89614d900 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -162,6 +162,7 @@ static inline void posix_cputimers_init_work(void) { } * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_signal_seq: Sequence count to control signal delivery + * @it_sigqueue_seq: The sequence count at the point where the signal was queued * @it_sigev_notify: The notify word of sigevent struct for signal delivery * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct @@ -184,6 +185,7 @@ struct k_itimer { s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; + unsigned int it_sigqueue_seq; int it_sigev_notify; enum pid_type it_pid_type; ktime_t it_interval; diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index b7bc545ec3b2..5a1ca43b5fc6 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -46,7 +46,7 @@ union __sifields { __kernel_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ + int _sys_private; /* Not used by the kernel. Historic leftover. Always 0. */ } _timer; /* POSIX.1b signals */ -- cgit v1.2.3 From 69f032c92cf883ea74a4b69ba3d91317aa6f174e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:49 +0100 Subject: signal: Provide ignored_posix_timers list To prepare for handling posix timer signals on sigaction(SIG_IGN) properly, add a list to task::signal. This list will be used to queue posix timers so their signal can be requeued when SIG_IGN is lifted later. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.920101900@linutronix.de --- include/linux/sched/signal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 02972fd41931..d5d03d919df8 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -138,6 +138,7 @@ struct signal_struct { /* POSIX.1b Interval Timers */ unsigned int next_posix_timer_id; struct hlist_head posix_timers; + struct hlist_head ignored_posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; -- cgit v1.2.3 From 0e20cd33acc7a173b23900550331ee82a23e9f00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:51 +0100 Subject: posix-timers: Handle ignored list on delete and exit To handle posix timer signals on sigaction(SIG_IGN) properly, the timers will be queued on a separate ignored list. Add the necessary cleanup code for timer_delete() and exit_itimers(). Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064213.987530588@linutronix.de --- include/linux/posix-timers.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 49a89614d900..1608b52a44d5 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -152,7 +152,8 @@ static inline void posix_cputimers_init_work(void) { } /** * struct k_itimer - POSIX.1b interval timer structure. - * @list: List head for binding the timer to signals->posix_timers + * @list: List node for binding the timer to tsk::signal::posix_timers + * @ignored_list: List node for tracking ignored timers in tsk::signal::ignored_posix_timers * @t_hash: Entry in the posix timer hash table * @it_lock: Lock protecting the timer * @kclock: Pointer to the k_clock struct handling this timer @@ -176,6 +177,7 @@ static inline void posix_cputimers_init_work(void) { } */ struct k_itimer { struct hlist_node list; + struct hlist_node ignored_list; struct hlist_node t_hash; spinlock_t it_lock; const struct k_clock *kclock; -- cgit v1.2.3 From df7a996b4dab03c889fa86d849447b716f07b069 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:54 +0100 Subject: signal: Queue ignored posixtimers on ignore list Queue posixtimers which have their signal ignored on the ignored list: 1) When the timer fires and the signal has SIG_IGN set 2) When SIG_IGN is installed via sigaction() and a timer signal is already queued This only happens when the signal is for a valid timer, which delivered the signal in periodic mode. One-shot timer signals are correctly dropped. Due to the lock order constraints (sighand::siglock nests inside timer::lock) the signal code cannot access any of the timer fields which are relevant to make this decision, e.g. timer::it_status. This is addressed by establishing a protection scheme which requires to lock both locks on the timer side for modifying decision fields in the timer struct and therefore makes it possible for the signal delivery to evaluate with only sighand:siglock being held: 1) Move the NULLification of timer->it_signal into the sighand::siglock protected section of timer_delete() and check timer::it_signal in the code path which determines whether the signal is dropped or queued on the ignore list. This ensures that a deleted timer cannot be moved onto the ignore list, which would prevent it from being freed on exit() as it is not longer in the process' posix timer list. If the timer got moved to the ignored list before deletion then it is removed from the ignored list under sighand lock in timer_delete(). 2) Provide a new timer::it_sig_periodic flag, which gets set in the signal queue path with both timer and sighand locks held if the timer is actually in periodic mode at expiry time. The ignore list code checks this flag under sighand::siglock and drops the signal when it is not set. If it is set, then the signal is moved to the ignored list independent of the actual state of the timer. When the signal is un-ignored later then the signal is moved back to the signal queue. On signal delivery the posix timer side decides about dropping the signal if the timer was re-armed, dis-armed or deleted based on the signal sequence counter check. If the thread/process exits then not yet delivered signals are discarded which means the reference of the timer containing the sigqueue is dropped and frees the timer. This is way cheaper than requiring all code paths to lock sighand::siglock of the target thread/process on any modification of timer::it_status or going all the way and removing pending signals from the signal queues on every rearm, disarm or delete operation. So the protection scheme here is that on the timer side both timer::lock and sighand::siglock have to be held for modifying timer::it_signal timer::it_sig_periodic which means that on the signal side holding sighand::siglock is enough to evaluate these fields. In posixtimer_deliver_signal() holding timer::lock is sufficient to do the sequence validation against timer::it_signal_seq because a concurrent expiry is waiting on timer::lock to be released. This completes the SIG_IGN handling and such timers are not longer self rearmed which avoids pointless wakeups. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064214.120756416@linutronix.de --- include/linux/posix-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 1608b52a44d5..43ea6e784a25 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -160,6 +160,7 @@ static inline void posix_cputimers_init_work(void) { } * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer * @it_status: The status of the timer + * @it_sig_periodic: The periodic status at signal delivery * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_signal_seq: Sequence count to control signal delivery @@ -184,6 +185,7 @@ struct k_itimer { clockid_t it_clock; timer_t it_id; int it_status; + bool it_sig_periodic; s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; -- cgit v1.2.3 From 7a66f72b09bb0762360274b1fb677b3433dbaa06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:55 +0100 Subject: posix-timers: Cleanup SIG_IGN workaround leftovers Now that ignored posix timer signals are requeued and the timers are rearmed on signal delivery the workaround to keep such timers alive and self rearm them is not longer required. Remove the relevant hacks and the not longer required return values from the related functions. The alarm timer workarounds will be cleaned up in a separate step. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20241105064214.187239060@linutronix.de --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 43ea6e784a25..f11f10c97bd9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -111,7 +111,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, void posixtimer_rearm_itimer(struct task_struct *p); bool posixtimer_init_sigqueue(struct sigqueue *q); -int posixtimer_send_sigqueue(struct k_itimer *tmr); +void posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); -- cgit v1.2.3 From 2634303f8773b0c602069887565cd412440be15d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:58 +0100 Subject: alarmtimers: Remove return value from alarm functions Now that the SIG_IGN problem is solved in the core code, the alarmtimer callbacks do not require a return value anymore. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241105064214.318837272@linutronix.de --- include/linux/alarmtimer.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 05e758b8b894..3ffa5341dce2 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -20,12 +20,6 @@ enum alarmtimer_type { ALARM_BOOTTIME_FREEZER, }; -enum alarmtimer_restart { - ALARMTIMER_NORESTART, - ALARMTIMER_RESTART, -}; - - #define ALARMTIMER_STATE_INACTIVE 0x00 #define ALARMTIMER_STATE_ENQUEUED 0x01 @@ -42,14 +36,14 @@ enum alarmtimer_restart { struct alarm { struct timerqueue_node node; struct hrtimer timer; - enum alarmtimer_restart (*function)(struct alarm *, ktime_t now); + void (*function)(struct alarm *, ktime_t now); enum alarmtimer_type type; int state; void *data; }; void alarm_init(struct alarm *alarm, enum alarmtimer_type type, - enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); + void (*function)(struct alarm *, ktime_t)); void alarm_start(struct alarm *alarm, ktime_t start); void alarm_start_relative(struct alarm *alarm, ktime_t start); void alarm_restart(struct alarm *alarm); -- cgit v1.2.3 From 1e4033b53db48cc77c436feac9c6d7d63db87b87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:43:57 +0000 Subject: net: skb_reset_mac_len() must check if mac_header was set Recent discussions show that skb_reset_mac_len() should be more careful. We expect the MAC header being set. If not, clear skb->mac_len and fire a warning for CONFIG_DEBUG_NET=y builds. If after investigations we find that not having a MAC header was okay, we can remove the warning. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/netdev/CANn89iJZGH+yEfJxfPWa3Hm7jxb-aeY2Up4HufmLMnVuQXt38A@mail.gmail.com/T/ Cc: En-Wei Wu Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 48f1e0fa2a13..5d8fefa71cac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2909,9 +2909,19 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb) skb->inner_transport_header = skb->transport_header; } +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac_header != (typeof(skb->mac_header))~0U; +} + static inline void skb_reset_mac_len(struct sk_buff *skb) { - skb->mac_len = skb->network_header - skb->mac_header; + if (!skb_mac_header_was_set(skb)) { + DEBUG_NET_WARN_ON_ONCE(1); + skb->mac_len = 0; + } else { + skb->mac_len = skb->network_header - skb->mac_header; + } } static inline unsigned char *skb_inner_transport_header(const struct sk_buff @@ -3014,11 +3024,6 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset) skb->network_header += offset; } -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != (typeof(skb->mac_header))~0U; -} - static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); -- cgit v1.2.3 From cfe8394e06f22847ecae0312bdd0b633b471b3f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:43:58 +0000 Subject: net: add debug check in skb_reset_inner_transport_header() Make sure (skb->data - skb->head) can fit in skb->inner_transport_header This needs CONFIG_DEBUG_NET=y. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5d8fefa71cac..75795568803c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2937,7 +2937,10 @@ static inline int skb_inner_transport_offset(const struct sk_buff *skb) static inline void skb_reset_inner_transport_header(struct sk_buff *skb) { - skb->inner_transport_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_transport_header))offset); + skb->inner_transport_header = offset; } static inline void skb_set_inner_transport_header(struct sk_buff *skb, -- cgit v1.2.3 From 1732e4bedb3e29986da6ea315e8bf8caf74e1e38 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:43:59 +0000 Subject: net: add debug check in skb_reset_inner_network_header() Make sure (skb->data - skb->head) can fit in skb->inner_network_header This needs CONFIG_DEBUG_NET=y. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 75795568803c..8dafd1295a6e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2957,7 +2957,10 @@ static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) static inline void skb_reset_inner_network_header(struct sk_buff *skb) { - skb->inner_network_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_network_header))offset); + skb->inner_network_header = offset; } static inline void skb_set_inner_network_header(struct sk_buff *skb, -- cgit v1.2.3 From 78a0cb2f45dc9327f26956c242f7f4b450efff49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:44:00 +0000 Subject: net: add debug check in skb_reset_inner_mac_header() Make sure (skb->data - skb->head) can fit in skb->inner_mac_header This needs CONFIG_DEBUG_NET=y. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8dafd1295a6e..32a7d8a65b9f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2982,7 +2982,10 @@ static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) static inline void skb_reset_inner_mac_header(struct sk_buff *skb) { - skb->inner_mac_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_mac_header))offset); + skb->inner_mac_header = offset; } static inline void skb_set_inner_mac_header(struct sk_buff *skb, -- cgit v1.2.3 From ae50ea52bdd77eabd8f3c4630c1b03c3373f61a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:44:01 +0000 Subject: net: add debug check in skb_reset_transport_header() Make sure (skb->data - skb->head) can fit in skb->transport_header This needs CONFIG_DEBUG_NET=y. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32a7d8a65b9f..f76524844e7e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3007,7 +3007,10 @@ static inline unsigned char *skb_transport_header(const struct sk_buff *skb) static inline void skb_reset_transport_header(struct sk_buff *skb) { - skb->transport_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->transport_header))offset); + skb->transport_header = offset; } static inline void skb_set_transport_header(struct sk_buff *skb, -- cgit v1.2.3 From 305ae87dafc1a9f35374a783119d9e6001d1b8e0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:44:02 +0000 Subject: net: add debug check in skb_reset_network_header() Make sure (skb->data - skb->head) can fit in skb->network_header This needs CONFIG_DEBUG_NET=y. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f76524844e7e..f1e49a32880d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3027,7 +3027,10 @@ static inline unsigned char *skb_network_header(const struct sk_buff *skb) static inline void skb_reset_network_header(struct sk_buff *skb) { - skb->network_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->network_header))offset); + skb->network_header = offset; } static inline void skb_set_network_header(struct sk_buff *skb, const int offset) -- cgit v1.2.3 From 3b6167e9bfc9eae4edad065c166c667f9a8e693a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2024 17:44:03 +0000 Subject: net: add debug check in skb_reset_mac_header() Make sure (skb->data - skb->head) can fit in skb->mac_header This needs CONFIG_DEBUG_NET=y. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20241105174403.850330-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f1e49a32880d..e5095d75abba 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3063,7 +3063,10 @@ static inline void skb_unset_mac_header(struct sk_buff *skb) static inline void skb_reset_mac_header(struct sk_buff *skb) { - skb->mac_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->mac_header))offset); + skb->mac_header = offset; } static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) -- cgit v1.2.3 From 7670e74ff31939acd792ff59fa83bc73d040dd8e Mon Sep 17 00:00:00 2001 From: Ed Tsai Date: Tue, 8 Oct 2024 14:59:42 +0800 Subject: scsi: ufs: ufs-mediatek: Configure individual LU queue flags Previously, ufs vops config_scsi_dev was removed because there were no users. ufs-mediatek needs it to configure the queue flags for each LU individually. Therefore, bring it back and customize the queue flag as required. [mkp: fixed typo] Signed-off-by: Ed Tsai Link: https://lore.kernel.org/r/20241008065950.23431-1-ed.tsai@mediatek.com Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9ea2a7411bb5..d7aca9e61684 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -386,6 +386,7 @@ struct ufs_hba_variant_ops { int (*get_outstanding_cqs)(struct ufs_hba *hba, unsigned long *ocqs); int (*config_esi)(struct ufs_hba *hba); + void (*config_scsi_dev)(struct scsi_device *sdev); }; /* clock gating state */ -- cgit v1.2.3 From 49a17639508c3b35f90ca829e60dddeeeb750e74 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 6 Nov 2024 15:51:39 +0100 Subject: softirq: Use a dedicated thread for timer wakeups on PREEMPT_RT. The timer and hrtimer soft interrupts are raised in hard interrupt context. With threaded interrupts force enabled or on PREEMPT_RT this leads to waking the ksoftirqd for the processing of the soft interrupt. ksoftirqd runs as SCHED_OTHER task which means it will compete with other tasks for CPU resources. This can introduce long delays for timer processing on heavy loaded systems and is not desired. Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated timers thread and let it run at the lowest SCHED_FIFO priority. Wake-ups for RT tasks happen from hardirq context so only timer_list timers and hrtimers for "regular" tasks are processed here. The higher priority ensures that wakeups are performed before scheduling SCHED_OTHER tasks. Using a dedicated variable to store the pending softirq bits values ensure that the timer are not accidentally picked up by ksoftirqd and other threaded interrupts. It shouldn't be picked up by ksoftirqd since it runs at lower priority. However if ksoftirqd is already running while a timer fires, then ksoftird will be PI-boosted due to the BH-lock to ktimer's priority. The timer thread can pick up pending softirqs from ksoftirqd but only if the softirq load is high. It is not be desired that the picked up softirqs are processed at SCHED_FIFO priority under high softirq load but this can already happen by a PI-boost by a force-threaded interrupt. [ frederic@kernel.org: rcutorture.c fixes, storm fix by introduction of local_timers_pending() for tick_nohz_next_event() ] [ junxiao.chang@intel.com: Ensure ktimersd gets woken up even if a softirq is currently served. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney [rcutorture] Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241106150419.2593080-4-bigeasy@linutronix.de --- include/linux/interrupt.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 457151f9f263..8cd9327e4e78 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -616,6 +616,53 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +/* + * With forced-threaded interrupts enabled a raised softirq is deferred to + * ksoftirqd unless it can be handled within the threaded interrupt. This + * affects timer_list timers and hrtimers which are explicitly marked with + * HRTIMER_MODE_SOFT. + * With PREEMPT_RT enabled more hrtimers are moved to softirq for processing + * which includes all timers which are not explicitly marked HRTIMER_MODE_HARD. + * Userspace controlled timers (like the clock_nanosleep() interface) is divided + * into two categories: Tasks with elevated scheduling policy including + * SCHED_{FIFO|RR|DL} and the remaining scheduling policy. The tasks with the + * elevated scheduling policy are woken up directly from the HARDIRQ while all + * other wake ups are delayed to softirq and so to ksoftirqd. + * + * The ksoftirqd runs at SCHED_OTHER policy at which it should remain since it + * handles the softirq in an overloaded situation (not handled everything + * within its last run). + * If the timers are handled at SCHED_OTHER priority then they competes with all + * other SCHED_OTHER tasks for CPU resources are possibly delayed. + * Moving timers softirqs to a low priority SCHED_FIFO thread instead ensures + * that timer are performed before scheduling any SCHED_OTHER thread. + */ +DECLARE_PER_CPU(struct task_struct *, ktimerd); +DECLARE_PER_CPU(unsigned long, pending_timer_softirq); +void raise_ktimers_thread(unsigned int nr); + +static inline unsigned int local_timers_pending_force_th(void) +{ + return __this_cpu_read(pending_timer_softirq); +} + +static inline void raise_timer_softirq(unsigned int nr) +{ + lockdep_assert_in_irq(); + if (force_irqthreads()) + raise_ktimers_thread(nr); + else + __raise_softirq_irqoff(nr); +} + +static inline unsigned int local_timers_pending(void) +{ + if (force_irqthreads()) + return local_timers_pending_force_th(); + else + return local_softirq_pending(); +} + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) -- cgit v1.2.3 From fbf920f255315974808ce91d934fe50198294d51 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:15 +0100 Subject: hrtimers: Add missing hrtimer_init() trace points hrtimer_init*_on_stack() is not covered by tracing when CONFIG_DEBUG_OBJECTS_TIMERS=y. Rework the functions similar to hrtimer_init() and hrtimer_init_sleeper() so that the hrtimer_init() tracepoint is unconditionally available. The rework makes hrtimer_init_sleeper() unused. Delete it. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/74528e8abf2bb96e8bee85ffacbf14e15cf89f0d.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index aa1e65ccb615..5aa9d57528c4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -228,32 +228,15 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, - enum hrtimer_mode mode); - -#ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else -static inline void hrtimer_init_on_stack(struct hrtimer *timer, - clockid_t which_clock, - enum hrtimer_mode mode) -{ - hrtimer_init(timer, which_clock, mode); -} - -static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, - clockid_t clock_id, - enum hrtimer_mode mode) -{ - hrtimer_init_sleeper(sl, clock_id, mode); -} - static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif -- cgit v1.2.3 From 908a1d775422ba2e27a5e33d0c130b522419e121 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:20 +0100 Subject: hrtimers: Introduce hrtimer_setup() to replace hrtimer_init() To initialize hrtimer, hrtimer_init() needs to be called and also hrtimer::function must be set. This is error-prone and awkward to use. Introduce hrtimer_setup() which does both of these things, so that users of hrtimer can be simplified. The new setup function also has a sanity check for the provided function pointer. If NULL, a warning is emitted and a dummy callback installed. hrtimer_init() will be removed as soon as all of its users have been converted to the new function. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/5057c1ddbfd4b92033cd93d37fe38e6b069d5ba6.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5aa9d57528c4..bcc0715c59a8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -228,6 +228,8 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), + clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, -- cgit v1.2.3 From 444cb7db4c9f9b5d96be17c38b3e989df7bfabd5 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:21 +0100 Subject: hrtimers: Introduce hrtimer_setup_on_stack() To initialize hrtimer on stack, hrtimer_init_on_stack() needs to be called and also hrtimer::function must be set. This is error-prone and awkward to use. Introduce hrtimer_setup_on_stack() which does both of these things, so that users of hrtimer can be simplified. The new setup function also has a sanity check for the provided function pointer. If NULL, a warning is emitted and a dummy callback installed. hrtimer_init_on_stack() will be removed as soon as all of its users have been converted to the new function. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/4b05e2ab3a82c517adf67fabc0f0cd8fe118b97c.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bcc0715c59a8..2da513f8d66a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -232,6 +232,9 @@ extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_setup_on_stack(struct hrtimer *timer, + enum hrtimer_restart (*function)(struct hrtimer *), + clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); -- cgit v1.2.3 From c9bd83abfeb9a9b103e689b251ccff7a01be8366 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:22 +0100 Subject: hrtimers: Introduce hrtimer_setup_sleeper_on_stack() The hrtimer_init*() API is replaced by hrtimer_setup*() variants to initialize the timer including the callback function at once. hrtimer_init_sleeper_on_stack() does not need user to setup the callback function separately, so a new variant would not be strictly necessary. Nonetheless, to keep the naming convention consistent, introduce hrtimer_setup_sleeper_on_stack(). hrtimer_init_on_stack() will be removed once all users are converted. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/7b5e18e6dd0ace9eaa211201528cb9dc23752454.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2da513f8d66a..48872a2b4071 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -238,6 +238,8 @@ extern void hrtimer_setup_on_stack(struct hrtimer *timer, extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); +extern void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void destroy_hrtimer_on_stack(struct hrtimer *timer); -- cgit v1.2.3 From 8f02e3563bb5824eb01c94f2c75f1dcee2d05625 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:23 +0100 Subject: hrtimers: Introduce hrtimer_update_function() Some users of hrtimer need to change the callback function after the initial setup. They write to hrtimer::function directly. That's not safe under all circumstances as the write is lockless and a concurrent timer expiry might end up using the wrong function pointer. Introduce hrtimer_update_function(), which also performs runtime checks whether it is safe to modify the callback. This allows to make hrtimer::function private once all users are converted. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20a937b0ae09ad54b5b6d86eabead7c570f1b72e.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 48872a2b4071..6e026730e803 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -327,6 +327,28 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) return timer->base->running == timer; } +/** + * hrtimer_update_function - Update the timer's callback function + * @timer: Timer to update + * @function: New callback function + * + * Only safe to call if the timer is not enqueued. Can be called in the callback function if the + * timer is not enqueued at the same time (see the comments above HRTIMER_STATE_ENQUEUED). + */ +static inline void hrtimer_update_function(struct hrtimer *timer, + enum hrtimer_restart (*function)(struct hrtimer *)) +{ + guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock); + + if (WARN_ON_ONCE(hrtimer_is_queued(timer))) + return; + + if (WARN_ON_ONCE(!function)) + return; + + timer->function = function; +} + /* Forward a hrtimer so it expires after now: */ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); -- cgit v1.2.3 From 211647e5121e0e0da974bf69a8eb7c9fe57fa3bd Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:28 +0100 Subject: wait: Switch to use hrtimer_setup_sleeper_on_stack() hrtimer_setup_sleeper_on_stack() replaces hrtimer_init_sleeper_on_stack() to keep the naming convention consistent. Convert the usage site over to it. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/fc91182375df81120a88dbe0263267e24d1bf19e.1730386209.git.namcao@linutronix.de --- include/linux/wait.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 8aa3372f21a0..643b7c7bf376 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -541,8 +541,8 @@ do { \ int __ret = 0; \ struct hrtimer_sleeper __t; \ \ - hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ - HRTIMER_MODE_REL); \ + hrtimer_setup_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ if ((timeout) != KTIME_MAX) { \ hrtimer_set_expires_range_ns(&__t.timer, timeout, \ current->timer_slack_ns); \ -- cgit v1.2.3 From f3bef7aaa6c807b78e8fc6929c3226d3038fe505 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:29 +0100 Subject: hrtimers: Delete hrtimer_init_sleeper_on_stack() hrtimer_init_sleeper_on_stack() is now unused. Delete it. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/52549846635c0b3a2abf82101f539efdabcd9778.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6e026730e803..4e4f04b3c0c2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -235,9 +235,6 @@ extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, extern void hrtimer_setup_on_stack(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); -extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, - clockid_t clock_id, - enum hrtimer_mode mode); extern void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); -- cgit v1.2.3 From 3c2fb0152175f9f596b40763cdc1378297da60af Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 31 Oct 2024 16:14:33 +0100 Subject: hrtimers: Delete hrtimer_init_on_stack() hrtimer_init_on_stack() is now unused. Delete it. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/510ce0d2944c4a382ea51e51d03dcfb73ba0f4f7.1730386209.git.namcao@linutronix.de --- include/linux/hrtimer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4e4f04b3c0c2..7ef5f7ef31a9 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -230,8 +230,6 @@ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); -extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); extern void hrtimer_setup_on_stack(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); -- cgit v1.2.3 From b33cc96c7020b923085046e5cf2e934f41c530ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Oct 2024 16:25:30 +0100 Subject: mm: add PageAnonNotKsm() Check that this anonymous page is really anonymous, not anonymous-or-KSM. This optimises the debug check, but its real purpose is to remove the last two users of PageKsm(). [willy@infradead.org: fix assertions] Link: https://lkml.kernel.org/r/ZwApWPER7caIA_N3@casper.infradead.org Link: https://lkml.kernel.org/r/20241002152533.1350629-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Alex Shi Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index cc839e4365c1..1fcef06a2d31 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -689,6 +689,13 @@ static __always_inline bool folio_test_anon(const struct folio *folio) return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } +static __always_inline bool PageAnonNotKsm(const struct page *page) +{ + unsigned long flags = (unsigned long)page_folio(page)->mapping; + + return (flags & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_ANON; +} + static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); @@ -1137,14 +1144,14 @@ static __always_inline int PageAnonExclusive(const struct page *page) static __always_inline void SetPageAnonExclusive(struct page *page) { - VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } static __always_inline void ClearPageAnonExclusive(struct page *page) { - VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } -- cgit v1.2.3 From b9a256352f3ba697396c26d2a74f4081335f8cef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Oct 2024 16:25:31 +0100 Subject: mm: remove PageKsm() All callers have been converted to use folio_test_ksm() or PageAnonNotKsm(), so we can remove this wrapper. Link: https://lkml.kernel.org/r/20241002152533.1350629-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Alex Shi Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1fcef06a2d31..e80665bc51fa 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -725,13 +725,8 @@ static __always_inline bool folio_test_ksm(const struct folio *folio) return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } - -static __always_inline bool PageKsm(const struct page *page) -{ - return folio_test_ksm(page_folio(page)); -} #else -TESTPAGEFLAG_FALSE(Ksm, ksm) +FOLIO_TEST_FLAG_FALSE(ksm) #endif u64 stable_page_flags(const struct page *page); -- cgit v1.2.3 From d7d65b1039019e8789119b498d97cf2531d989a8 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 3 Oct 2024 10:18:42 +0530 Subject: mm: move set_pxd_safe() helpers from generic to platform set_pxd_safe() helpers that serve a specific purpose for both x86 and riscv platforms, do not need to be in the common memory code. Otherwise they just unnecessarily make the common API more complicated. This moves the helpers from common code to platform instead. Link: https://lkml.kernel.org/r/20241003044842.246016-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Suggested-by: David Hildenbrand Acked-by: Dave Hansen Acked-by: David Hildenbrand Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Thomas Gleixner Cc: David Hildenbrand Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e8b2ac6bd2ae..23aeffd89a4e 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1056,44 +1056,6 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) } #endif -/* - * Use set_p*_safe(), and elide TLB flushing, when confident that *no* - * TLB flush will be required as a result of the "set". For example, use - * in scenarios where it is known ahead of time that the routine is - * setting non-present entries, or re-setting an existing entry to the - * same value. Otherwise, use the typical "set" helpers and flush the - * TLB. - */ -#define set_pte_safe(ptep, pte) \ -({ \ - WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ - set_pte(ptep, pte); \ -}) - -#define set_pmd_safe(pmdp, pmd) \ -({ \ - WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ - set_pmd(pmdp, pmd); \ -}) - -#define set_pud_safe(pudp, pud) \ -({ \ - WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ - set_pud(pudp, pud); \ -}) - -#define set_p4d_safe(p4dp, p4d) \ -({ \ - WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ - set_p4d(p4dp, p4d); \ -}) - -#define set_pgd_safe(pgdp, pgd) \ -({ \ - WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ - set_pgd(pgdp, pgd); \ -}) - #ifndef __HAVE_ARCH_DO_SWAP_PAGE static inline void arch_do_swap_page_nr(struct mm_struct *mm, struct vm_area_struct *vma, -- cgit v1.2.3 From 7f24cbc9c4d42db8a3c8484d120cf9c1da557fab Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 7 Oct 2024 09:50:29 +0200 Subject: mm/mmap: teach generic_get_unmapped_area{_topdown} to handle hugetlb mappings Patch series "Unify hugetlb into arch_get_unmapped_area functions", v4. This is an attempt to get rid of a fair amount of duplicated code wrt. hugetlb and *get_unmapped_area* functions. HugeTLB registers a .get_unmapped_area function which gets called from __get_unmapped_area(). hugetlb_get_unmapped_area() is defined by a bunch of architectures and it also has a generic definition for those that do not define it. Short-long story is that there is a ton of duplicated code between specific hugetlb *_get_unmapped_area_* functions and mm-core functions, so we can do better by teaching arch_get_unmapped_area* functions how to deal with hugetlb mappings. Note that not a lot of things need to be taught though. hugetlb_get_unmapped_area, that gets called for hugetlb mappings, runs some sanity checks prior to calling mm_get_unmapped_area_vmflags(), so we do not need to that down the road in the respective {generic,arch}_get_unmapped_area* functions. More information can be found in the respective patches. LTP mmapstress hugetlb selftests were ran succesfully on: This patch (of 9): We want to stop special casing hugetlb mappings and make them go through generic channels, so teach generic_get_unmapped_area{_topdown} to handle those. The main difference is that we set info.align_mask for huge mappings. Link: https://lkml.kernel.org/r/20241007075037.267650-1-osalvador@suse.de Link: https://lkml.kernel.org/r/20241007075037.267650-2-osalvador@suse.de Signed-off-by: Oscar Salvador Cc: David Hildenbrand Cc: Donet Tom Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e4697539b665..368d552e4860 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1035,9 +1035,19 @@ void hugetlb_unregister_node(struct node *node); */ bool is_raw_hwpoison_page_in_hugepage(struct page *page); +static inline unsigned long huge_page_mask_align(struct file *file) +{ + return PAGE_MASK & ~huge_page_mask(hstate_file(file)); +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline unsigned long huge_page_mask_align(struct file *file) +{ + return 0; +} + static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return NULL; -- cgit v1.2.3 From 7bd3f1e1a9ae7f7508c88dd056755a0a4741ae88 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 7 Oct 2024 09:50:34 +0200 Subject: mm: make hugetlb mappings go through mm_get_unmapped_area_vmflags Hugetlb mappings will no longer be special cased but rather go through the generic mm_get_unmapped_area_vmflags function. For that to happen, let us remove the .get_unmapped_area from hugetlbfs_file_operations struct, and hint __get_unmapped_area that it should not send hugetlb mappings through thp_get_unmapped_area_vmflags but through mm_get_unmapped_area_vmflags. Create also a function called hugetlb_mmap_check_and_align() where a couple of safety checks are being done and the addr is aligned to the huge page size. Otherwise we will have to do this in every single function, which duplicates quite a lot of code. Link: https://lkml.kernel.org/r/20241007075037.267650-7-osalvador@suse.de Signed-off-by: Oscar Salvador Cc: David Hildenbrand Cc: Donet Tom Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 368d552e4860..3a81b6126f62 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -546,11 +546,10 @@ static inline struct hstate *hstate_inode(struct inode *i) } #endif /* !CONFIG_HUGETLBFS */ -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ +unsigned long +__generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); unsigned long generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, -- cgit v1.2.3 From cc92882ee218d62ef017fa545b3c8a2d1e060a5a Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 7 Oct 2024 09:50:35 +0200 Subject: mm: drop hugetlb_get_unmapped_area{_*} functions Hugetlb mappings are now handled through normal channels just like any other mapping, so we no longer need hugetlb_get_unmapped_area* specific functions. Link: https://lkml.kernel.org/r/20241007075037.267650-8-osalvador@suse.de Signed-off-by: Oscar Salvador Cc: David Hildenbrand Cc: Donet Tom Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3a81b6126f62..ae4fe8615bb6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -547,15 +547,10 @@ static inline struct hstate *hstate_inode(struct inode *i) #endif /* !CONFIG_HUGETLBFS */ unsigned long -__generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -unsigned long -generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); - /* * huegtlb page specific state flags. These flags are located in page.private * of the hugetlb head page. Functions created via the below macros should be -- cgit v1.2.3 From 5b2f650d593ed4d020228df8563e7ad23abc847f Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 7 Oct 2024 09:50:36 +0200 Subject: arch/s390: clean up hugetlb definitions s390 redefines functions that are already defined (and the same) in include/asm-generic/hugetlb.h. Do as the other architectures: 1) include include/asm-generic/hugetlb.h 2) drop the already defined functions in the generic hugetlb.h and 3) use the __HAVE_ARCH_HUGE_* macros to define our own. This gets rid of quite some code. Link: https://lkml.kernel.org/r/20241007075037.267650-9-osalvador@suse.de Signed-off-by: Oscar Salvador Cc: David Hildenbrand Cc: Donet Tom Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/asm-generic/hugetlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 594d5905f615..67bbdafcfc22 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -42,20 +42,26 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) return pte_modify(pte, newprot); } +#ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP static inline pte_t huge_pte_mkuffd_wp(pte_t pte) { return huge_pte_wrprotect(pte_mkuffd_wp(pte)); } +#endif +#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP static inline pte_t huge_pte_clear_uffd_wp(pte_t pte) { return pte_clear_uffd_wp(pte); } +#endif +#ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP static inline int huge_pte_uffd_wp(pte_t pte) { return pte_uffd_wp(pte); } +#endif #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, @@ -106,10 +112,12 @@ static inline int huge_pte_none(pte_t pte) #endif /* Please refer to comments above pte_none_mostly() for the usage */ +#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { return huge_pte_none(pte) || is_pte_marker(pte); } +#endif #ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE static inline int prepare_hugepage_range(struct file *file, -- cgit v1.2.3 From bd40b053fabe27209cb240d205a0c817cbe5fb87 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 7 Oct 2024 09:50:37 +0200 Subject: mm: consolidate common checks in hugetlb_get_unmapped_area prepare_hugepage_range() performs almost the same checks for all architectures that define it, with the exception of mips and loongarch that also check for overflows. The rest checks for the addr and len to be properly aligned, so we can move that to hugetlb_get_unmapped_area() and get rid of a fair amount of duplicated code. [akpm@linux-foundation.org: remove now-unused local] Link: https://lore.kernel.org/oe-kbuild-all/202410081210.uNLbf3Jk-lkp@intel.com/ Link: https://lkml.kernel.org/r/20241007075037.267650-10-osalvador@suse.de Signed-off-by: Oscar Salvador Cc: David Hildenbrand Cc: Donet Tom Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/asm-generic/hugetlb.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 67bbdafcfc22..f42133dae68e 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -123,13 +123,6 @@ static inline int huge_pte_none_mostly(pte_t pte) static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - struct hstate *h = hstate_file(file); - - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (addr & ~huge_page_mask(h)) - return -EINVAL; - return 0; } #endif -- cgit v1.2.3 From afe789b7367ad43ba8f079981d40851f8bd319ce Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 8 Oct 2024 19:50:24 -0700 Subject: kaslr: rename physmem_end and PHYSMEM_END to direct_map_physmem_end For clarity. It's increasingly hard to reason about the code, when KASLR is moving around the boundaries. In this case where KASLR is randomizing the location of the kernel image within physical memory, the maximum number of address bits for physical memory has not changed. What has changed is the ending address of memory that is allowed to be directly mapped by the kernel. Let's name the variable, and the associated macro accordingly. Also, enhance the comment above the direct_map_physmem_end definition, to further clarify how this all works. Link: https://lkml.kernel.org/r/20241009025024.89813-1-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Pankaj Gupta Acked-by: David Hildenbrand Acked-by: Will Deacon Reviewed-by: Mike Rapoport (Microsoft) Cc: Thomas Gleixner Cc: Alistair Popple Cc: Jordan Niethe Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f5394d75ce2..4570f33e2429 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -97,11 +97,11 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif -#ifndef PHYSMEM_END +#ifndef DIRECT_MAP_PHYSMEM_END # ifdef MAX_PHYSMEM_BITS -# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) +# define DIRECT_MAP_PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) # else -# define PHYSMEM_END (((phys_addr_t)-1)&~(1ULL<<63)) +# define DIRECT_MAP_PHYSMEM_END (((phys_addr_t)-1)&~(1ULL<<63)) # endif #endif -- cgit v1.2.3 From 6359c39c9de66dede8ff5ff257c9e117483dbc7c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 10 Oct 2024 14:15:56 +0800 Subject: mm: remove unused hugepage for vma_alloc_folio() The hugepage parameter was deprecated since commit ddc1a5cbc05d ("mempolicy: alloc_pages_mpol() for NUMA policy without vma"), for PMD-sized THP, it still tries only preferred node if possible in vma_alloc_folio() by checking the order of the folio allocation. Link: https://lkml.kernel.org/r/20241010061556.1846751-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Barry Song Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/gfp.h | 6 +++--- include/linux/highmem.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a951de920e20..b65724c3427d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -306,7 +306,7 @@ struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, bool hugepage); + unsigned long addr); #else static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { @@ -326,7 +326,7 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde { return folio_alloc_noprof(gfp, order); } -#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \ +#define vma_alloc_folio_noprof(gfp, order, vma, addr) \ folio_alloc_noprof(gfp, order) #endif @@ -341,7 +341,7 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde static inline struct page *alloc_page_vma_noprof(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { - struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false); + struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr); return &folio->page; } diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 930a591b9b61..bec9bd715acf 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -226,7 +226,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, { struct folio *folio; - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false); + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr); if (folio) clear_user_highpage(&folio->page, vaddr); -- cgit v1.2.3 From 0aa3ef3637920799f1b2f67dfff0d698127444ac Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 9 Oct 2024 17:35:50 -0700 Subject: memcg: add tracing for memcg stat updates The memcg stats are maintained in rstat infrastructure which provides very fast updates side and reasonable read side. However memcg added plethora of stats and made the read side, which is cgroup rstat flush, very slow. To solve that, threshold was added in the memcg stats read side i.e. no need to flush the stats if updates are within the threshold. This threshold based improvement worked for sometime but more stats were added to memcg and also the read codepath was getting triggered in the performance sensitive paths which made threshold based ratelimiting ineffective. We need more visibility into the hot and cold stats i.e. stats with a lot of updates. Let's add trace to get that visibility. [shakeel.butt@linux.dev: use unsigned long type for memcg_rstat_events, per Yosry] Link: https://lkml.kernel.org/r/20241015213721.3804209-1-shakeel.butt@linux.dev Link: https://lkml.kernel.org/r/20241010003550.3695245-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Reviewed-by: Yosry Ahmed Acked-by: Johannes Weiner Reviewed-by: T.J. Mercier Cc: Michal Hocko Cc: Muchun Song Cc: JP Kobryn Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/trace/events/memcg.h | 81 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 include/trace/events/memcg.h (limited to 'include') diff --git a/include/trace/events/memcg.h b/include/trace/events/memcg.h new file mode 100644 index 000000000000..8667e57816d2 --- /dev/null +++ b/include/trace/events/memcg.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM memcg + +#if !defined(_TRACE_MEMCG_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MEMCG_H + +#include +#include + + +DECLARE_EVENT_CLASS(memcg_rstat_stats, + + TP_PROTO(struct mem_cgroup *memcg, int item, int val), + + TP_ARGS(memcg, item, val), + + TP_STRUCT__entry( + __field(u64, id) + __field(int, item) + __field(int, val) + ), + + TP_fast_assign( + __entry->id = cgroup_id(memcg->css.cgroup); + __entry->item = item; + __entry->val = val; + ), + + TP_printk("memcg_id=%llu item=%d val=%d", + __entry->id, __entry->item, __entry->val) +); + +DEFINE_EVENT(memcg_rstat_stats, mod_memcg_state, + + TP_PROTO(struct mem_cgroup *memcg, int item, int val), + + TP_ARGS(memcg, item, val) +); + +DEFINE_EVENT(memcg_rstat_stats, mod_memcg_lruvec_state, + + TP_PROTO(struct mem_cgroup *memcg, int item, int val), + + TP_ARGS(memcg, item, val) +); + +DECLARE_EVENT_CLASS(memcg_rstat_events, + + TP_PROTO(struct mem_cgroup *memcg, int item, unsigned long val), + + TP_ARGS(memcg, item, val), + + TP_STRUCT__entry( + __field(u64, id) + __field(int, item) + __field(unsigned long, val) + ), + + TP_fast_assign( + __entry->id = cgroup_id(memcg->css.cgroup); + __entry->item = item; + __entry->val = val; + ), + + TP_printk("memcg_id=%llu item=%d val=%lu", + __entry->id, __entry->item, __entry->val) +); + +DEFINE_EVENT(memcg_rstat_events, count_memcg_events, + + TP_PROTO(struct mem_cgroup *memcg, int item, unsigned long val), + + TP_ARGS(memcg, item, val) +); + + +#endif /* _TRACE_MEMCG_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 5708d96da20b99b4665ad72395e3727016057f70 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 11 Oct 2024 11:03:04 -0400 Subject: mm: avoid zeroing user movable page twice with init_on_alloc=1 Commit 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") forces allocated page to be zeroed in post_alloc_hook() when init_on_alloc=1. For order-0 folios, if arch does not define vma_alloc_zeroed_movable_folio(), the default implementation again zeros the page return from the buddy allocator. So the page is zeroed twice. Fix it by passing __GFP_ZERO instead to avoid double page zeroing. At the moment, s390,arm64,x86,alpha,m68k are not impacted since they define their own vma_alloc_zeroed_movable_folio(). For >0 order folios (mTHP and PMD THP), folio_zero_user() is called to zero the folio again. Fix it by calling folio_zero_user() only if init_on_alloc is set. All arch are impacted. Add alloc_zeroed() helper to encapsulate the init_on_alloc check. [ziy@nvidia.com: comment fixes, per David] Link: https://lkml.kernel.org/r/97DB52E1-C594-49B5-9736-89AC302FAB01@nvidia.com Link: https://lkml.kernel.org/r/20241011150304.709590-1-ziy@nvidia.com Signed-off-by: Zi Yan Acked-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: "Huang, Ying" Cc: John Hubbard Cc: Kees Cook Cc: Kefeng Wang Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/highmem.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index bec9bd715acf..6e452bd8e7e3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -224,13 +224,7 @@ static inline struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr) { - struct folio *folio; - - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr); - if (folio) - clear_user_highpage(&folio->page, vaddr); - - return folio; + return vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr); } #endif -- cgit v1.2.3 From 1f2d03cc535138b7cdbed0122cdc0f9e9626c6bf Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Fri, 11 Oct 2024 21:49:28 +0900 Subject: vmscan: add a vmscan event for reclaim_pages reclaim_folio_list uses a dummy reclaim_stat and is not being used. To know the memory stat, add a new trace event. This is useful how how many pages are not reclaimed or why. This is an example: mm_vmscan_reclaim_pages: nid=0 nr_scanned=112 nr_reclaimed=112 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 Currently reclaim_folio_list is only called by reclaim_pages, and reclaim_pages is used by damon and madvise. In the latest Android, reclaim_pages is also used by shmem to reclaim all pages in a address_space. [jaewon31.kim@samsung.com: use sc.nr_scanned rather than new counting] Link: https://lkml.kernel.org/r/20241016143227.961162-1-jaewon31.kim@samsung.com Link: https://lkml.kernel.org/r/20241011124928.1224813-1-jaewon31.kim@samsung.com Signed-off-by: Jaewon Kim Acked-by: Vlastimil Babka Cc: Jaewon Kim Cc: Kalesh Singh Cc: Minchan Kim Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/trace/events/vmscan.h | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 1a488c30afa5..490958fa10de 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -346,6 +346,51 @@ TRACE_EVENT(mm_vmscan_write_folio, show_reclaim_flags(__entry->reclaim_flags)) ); +TRACE_EVENT(mm_vmscan_reclaim_pages, + + TP_PROTO(int nid, + unsigned long nr_scanned, unsigned long nr_reclaimed, + struct reclaim_stat *stat), + + TP_ARGS(nid, nr_scanned, nr_reclaimed, stat), + + TP_STRUCT__entry( + __field(int, nid) + __field(unsigned long, nr_scanned) + __field(unsigned long, nr_reclaimed) + __field(unsigned long, nr_dirty) + __field(unsigned long, nr_writeback) + __field(unsigned long, nr_congested) + __field(unsigned long, nr_immediate) + __field(unsigned int, nr_activate0) + __field(unsigned int, nr_activate1) + __field(unsigned long, nr_ref_keep) + __field(unsigned long, nr_unmap_fail) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->nr_scanned = nr_scanned; + __entry->nr_reclaimed = nr_reclaimed; + __entry->nr_dirty = stat->nr_dirty; + __entry->nr_writeback = stat->nr_writeback; + __entry->nr_congested = stat->nr_congested; + __entry->nr_immediate = stat->nr_immediate; + __entry->nr_activate0 = stat->nr_activate[0]; + __entry->nr_activate1 = stat->nr_activate[1]; + __entry->nr_ref_keep = stat->nr_ref_keep; + __entry->nr_unmap_fail = stat->nr_unmap_fail; + ), + + TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld", + __entry->nid, + __entry->nr_scanned, __entry->nr_reclaimed, + __entry->nr_dirty, __entry->nr_writeback, + __entry->nr_congested, __entry->nr_immediate, + __entry->nr_activate0, __entry->nr_activate1, + __entry->nr_ref_keep, __entry->nr_unmap_fail) +); + TRACE_EVENT(mm_vmscan_lru_shrink_inactive, TP_PROTO(int nid, -- cgit v1.2.3 From f1001f3d3b6868998cab73d10fda1a5c99ddf963 Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Thu, 17 Oct 2024 18:15:28 +0000 Subject: mm/mglru: reset page lru tier bits when activating When a folio is activated, lru_gen_add_folio() moves the folio to the youngest generation. But unlike folio_update_gen()/folio_inc_gen(), lru_gen_add_folio() doesn't reset the folio lru tier bits (LRU_REFS_MASK | LRU_REFS_FLAGS). This inconsistency can affect how pages are aged via folio_mark_accessed() (e.g. fd accesses), though no user visible impact related to this has been detected yet. Note that lru_gen_add_folio() cannot clear PG_workingset if the activation is due to workingset refault, otherwise PSI accounting will be skipped. So fix lru_gen_add_folio() to clear the lru tier bits other than PG_workingset when activating a folio, and also clear all the lru tier bits when a folio is activated via folio_activate() in lru_gen_look_around(). Link: https://lkml.kernel.org/r/20241017181528.3358821-1-weixugc@google.com Fixes: 018ee47f1489 ("mm: multi-gen LRU: exploit locality in rmap") Signed-off-by: Wei Xu Cc: Axel Rasmussen Cc: Brian Geffon Cc: Jan Alexander Steffens Cc: Suleiman Souhlal Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 15 ++++++++++++++- include/linux/mmzone.h | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 6f801c7b36e2..355cf46a01a6 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -155,6 +155,11 @@ static inline int folio_lru_refs(struct folio *folio) return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; } +static inline void folio_clear_lru_refs(struct folio *folio) +{ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); +} + static inline int folio_lru_gen(struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags); @@ -222,6 +227,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, { unsigned long seq; unsigned long flags; + unsigned long mask; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -257,7 +263,14 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + mask = LRU_GEN_MASK; + /* + * Don't clear PG_workingset here because it can affect PSI accounting + * if the activation is due to workingset refault. + */ + if (folio_test_active(folio)) + mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active); + set_mask_bits(&folio->flags, mask, flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5b1c984daf45..2e8c4307c728 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -403,6 +403,8 @@ enum { NR_LRU_GEN_CAPS }; +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) -- cgit v1.2.3 From 9884efd795cc2f71ef3b7f42df32420b0b7ce34f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 17 Oct 2024 22:14:56 +0800 Subject: mm: huge_memory: move file_thp_enabled() into huge_memory.c file_thp_enabled() is only used in __thp_vma_allowable_orders(), so move it into huge_memory.c, also check READ_ONLY_THP_FOR_FS ahead to avoid unnecessary code if config disabled. Link: https://lkml.kernel.org/r/20241017141457.1169092-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Cc: Barry Song Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8afe09a2cf03..006f730545c2 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -252,19 +252,6 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, return orders; } -static inline bool file_thp_enabled(struct vm_area_struct *vma) -{ - struct inode *inode; - - if (!vma->vm_file) - return false; - - inode = vma->vm_file->f_inode; - - return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && - !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); -} - unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, unsigned long tva_flags, -- cgit v1.2.3 From b7f058f827392022d8c689329f88c7b324d71dad Mon Sep 17 00:00:00 2001 From: Luoxi Li Date: Fri, 18 Oct 2024 17:22:35 +0800 Subject: mm: remove unused has_isolate_pageblock has_isolate_pageblock() has been unused since commit 55612e80e722 ("mm: page_alloc: close migratetype race between freeing and stealing") Remove it. Link: https://lkml.kernel.org/r/20241018092235.2764859-1-kaixa@kiloview.com Signed-off-by: Luoxi Li Acked-by: David Hildenbrand Reviewed-by: Muhammad Usama Anjum Acked-by: Johannes Weiner Reviewed-by: Anshuman Khandual Cc: Baolin Wang Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page-isolation.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index c16db0067090..73dc2c1841ec 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -3,10 +3,6 @@ #define __LINUX_PAGEISOLATION_H #ifdef CONFIG_MEMORY_ISOLATION -static inline bool has_isolate_pageblock(struct zone *zone) -{ - return zone->nr_isolate_pageblock; -} static inline bool is_migrate_isolate_page(struct page *page) { return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; @@ -16,10 +12,6 @@ static inline bool is_migrate_isolate(int migratetype) return migratetype == MIGRATE_ISOLATE; } #else -static inline bool has_isolate_pageblock(struct zone *zone) -{ - return false; -} static inline bool is_migrate_isolate_page(struct page *page) { return false; -- cgit v1.2.3 From 78c018e3942c5dfbab7e6edb4eb784943878504b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 7 Oct 2024 23:47:45 +0200 Subject: maple_tree: fix outdated flag name in comment MAPLE_USE_RCU was renamed to MT_FLAGS_USE_RCU at some point, fix up the comment. Link: https://lkml.kernel.org/r/20241007-maple-tree-doc-fix-v1-1-6bbf89c1153d@google.com Signed-off-by: Jann Horn Reviewed-by: Liam R. Howlett Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index c2c11004085e..61c236850ca8 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -224,7 +224,7 @@ typedef struct { /* nothing */ } lockdep_map_p; * (set at tree creation time) and dynamic information set under the spinlock. * * Another use of flags are to indicate global states of the tree. This is the - * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in + * case with the MT_FLAGS_USE_RCU flag, which indicates the tree is currently in * RCU mode. This mode was added to allow the tree to reuse nodes instead of * re-allocating and RCU freeing nodes when there is a single user. */ -- cgit v1.2.3 From ed265529d39ac408396c031a4fd7e1ef922b80d0 Mon Sep 17 00:00:00 2001 From: Sourav Panda Date: Tue, 22 Oct 2024 23:24:40 +0000 Subject: mm/codetag: fix arg in pgalloc_tag_copy alloc_tag_sub alloc_tag_sub() takes bytes as opposed to number of pages as argument. Currently pgalloc_tag_copy() passes the number of pages. This fix passes the correct unit, which is the number of bytes allocated. Link: https://lkml.kernel.org/r/20241022232440.334820-1-souravpanda@google.com Fixes: e0a955bf7f61 ("mm/codetag: add pgalloc_tag_copy()") Signed-off-by: Sourav Panda Acked-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Reviewed-by: Anshuman Khandual Cc: Wei Xu Cc: Yu Zhao Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4570f33e2429..eb070c14e309 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4207,7 +4207,7 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) /* Clear the old ref to the original allocation tag. */ clear_page_tag_ref(&old->page); /* Decrement the counters of the tag on get_new_folio. */ - alloc_tag_sub(ref, folio_nr_pages(new)); + alloc_tag_sub(ref, folio_size(new)); __alloc_tag_ref_set(ref, tag); -- cgit v1.2.3 From 628e1b8c4777941e119effc92cd395b4b02c2c5f Mon Sep 17 00:00:00 2001 From: James Houghton Date: Mon, 21 Oct 2024 16:02:12 +0000 Subject: mm: add missing mmu_notifier_clear_young for !MMU_NOTIFIER Remove the now unnecessary ifdef in mm/damon/vaddr.c as well. Link: https://lkml.kernel.org/r/20241021160212.9935-1-jthoughton@google.com Signed-off-by: James Houghton Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Reviewed-by: SeongJae Park Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index d39ebb10caeb..e2dd57ca368b 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -606,6 +606,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline int mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + return 0; +} + static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { -- cgit v1.2.3 From 6b611388b626eaa59d202bf8f64d095ff80bcde6 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 24 Oct 2024 18:22:59 -0700 Subject: memcg-v1: remove charge move code The memcg-v1 charge move feature has been deprecated completely and let's remove the relevant code as well. Link: https://lkml.kernel.org/r/20241025012304.2473312-3-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Hugh Dickins Cc: Muchun Song Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 34d2da05f2f1..0b113267b2de 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -299,11 +299,6 @@ struct mem_cgroup { /* For oom notifier event fd */ struct list_head oom_notify; - /* - * Should we move charges of a task when a task is moved into this - * mem_cgroup ? And what type of charges should we move ? - */ - unsigned long move_charge_at_immigrate; /* taken only while moving_account > 0 */ spinlock_t move_lock; unsigned long move_lock_flags; -- cgit v1.2.3 From a29c0e4b2e867f4e362a6740c430bfdc2efdd1d9 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 24 Oct 2024 18:23:03 -0700 Subject: memcg-v1: remove memcg move locking code The memcg v1's charge move feature has been deprecated. All the places using the memcg move lock, have stopped using it as they don't need the protection any more. Let's proceed to remove all the locking code related to charge moving. Link: https://lkml.kernel.org/r/20241025012304.2473312-7-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Hugh Dickins Cc: Muchun Song Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 54 ---------------------------------------------- 1 file changed, 54 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0b113267b2de..bb49e0d4b377 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -299,20 +299,10 @@ struct mem_cgroup { /* For oom notifier event fd */ struct list_head oom_notify; - /* taken only while moving_account > 0 */ - spinlock_t move_lock; - unsigned long move_lock_flags; - /* Legacy tcp memory accounting */ bool tcpmem_active; int tcpmem_pressure; - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - struct task_struct *move_lock_task; - /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; @@ -428,9 +418,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) * * - the folio lock * - LRU isolation - * - folio_memcg_lock() * - exclusive reference - * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -499,9 +487,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) * * - the folio lock * - LRU isolation - * - lock_folio_memcg() * - exclusive reference - * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -1867,26 +1853,6 @@ static inline bool task_in_memcg_oom(struct task_struct *p) return p->memcg_in_oom; } -void folio_memcg_lock(struct folio *folio); -void folio_memcg_unlock(struct folio *folio); - -/* try to stablize folio_memcg() for all the pages in a memcg */ -static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) -{ - rcu_read_lock(); - - if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) - return true; - - rcu_read_unlock(); - return false; -} - -static inline void mem_cgroup_unlock_pages(void) -{ - rcu_read_unlock(); -} - static inline void mem_cgroup_enter_user_fault(void) { WARN_ON(current->in_user_fault); @@ -1908,26 +1874,6 @@ unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, return 0; } -static inline void folio_memcg_lock(struct folio *folio) -{ -} - -static inline void folio_memcg_unlock(struct folio *folio) -{ -} - -static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) -{ - /* to match folio_memcg_rcu() */ - rcu_read_lock(); - return true; -} - -static inline void mem_cgroup_unlock_pages(void) -{ - rcu_read_unlock(); -} - static inline bool task_in_memcg_oom(struct task_struct *p) { return false; -- cgit v1.2.3 From 906c38ff52e95575ddf3281bee531eded3dba150 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 26 Oct 2024 09:37:07 -0700 Subject: memcg: workingset: remove folio_memcg_rcu usage The function workingset_activation() is called from folio_mark_accessed() with the guarantee that the given folio can not be freed under us in workingset_activation(). In addition, the association of the folio and its memcg can not be broken here because charge migration is no more. There is no need to use folio_memcg_rcu. Simply use folio_memcg_charged() because that is what this function cares about. [akpm@linux-foundation.org: provide folio_memcg_charged stub for CONFIG_MEMCG=n] Link: https://lkml.kernel.org/r/20241026163707.2479526-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Suggested-by: Yu Zhao Cc: Michal Hocko Cc: Roman Gushchin Cc: Johannes Weiner Cc: Muchun Song Cc: Hugh Dickins Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bb49e0d4b377..8e4608be811d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -443,35 +443,6 @@ static inline bool folio_memcg_charged(struct folio *folio) return __folio_memcg(folio) != NULL; } -/** - * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. - * @folio: Pointer to the folio. - * - * This function assumes that the folio is known to have a - * proper memory cgroup pointer. It's not safe to call this function - * against some type of folios, e.g. slab folios or ex-slab folios. - * - * Return: A pointer to the memory cgroup associated with the folio, - * or NULL. - */ -static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) -{ - unsigned long memcg_data = READ_ONCE(folio->memcg_data); - - VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - - if (memcg_data & MEMCG_DATA_KMEM) { - struct obj_cgroup *objcg; - - objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); - return obj_cgroup_memcg(objcg); - } - - WARN_ON_ONCE(!rcu_read_lock_held()); - - return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); -} - /* * folio_memcg_check - Get the memory cgroup associated with a folio. * @folio: Pointer to the folio. @@ -1084,10 +1055,9 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) return NULL; } -static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) +static inline bool folio_memcg_charged(struct folio *folio) { - WARN_ON_ONCE(!rcu_read_lock_held()); - return NULL; + return false; } static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) -- cgit v1.2.3 From 28aeaeaca62ebb1b638582ef81f8974f520c0dcb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 28 Oct 2024 22:44:41 +0200 Subject: media: i2c: mt9p031: Drop support for legacy platform data No user of the mt9p031_platform_data legacy platform data for board files exist in the kernel anymore. Drop support from the driver. Signed-off-by: Laurent Pinchart Reviewed-by: Tarang Raval Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/i2c/mt9p031.h | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 include/media/i2c/mt9p031.h (limited to 'include') diff --git a/include/media/i2c/mt9p031.h b/include/media/i2c/mt9p031.h deleted file mode 100644 index f933cd0be8e5..000000000000 --- a/include/media/i2c/mt9p031.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef MT9P031_H -#define MT9P031_H - -struct v4l2_subdev; - -/* - * struct mt9p031_platform_data - MT9P031 platform data - * @ext_freq: Input clock frequency - * @target_freq: Pixel clock frequency - */ -struct mt9p031_platform_data { - unsigned int pixclk_pol:1; - int ext_freq; - int target_freq; -}; - -#endif -- cgit v1.2.3 From ff0f0353826d19d826ae07d941ebaf3ed241e577 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sat, 19 Oct 2024 20:23:12 +0300 Subject: media: mc: Rename pad as origin in __media_pipeline_start() Rename the pad field in __media_pipeline_start() to both better describe what it is and avoid masking it during the loop. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/media-entity.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 0393b23129eb..64cf590b1134 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -1143,10 +1143,10 @@ struct media_entity *media_graph_walk_next(struct media_graph *graph); /** * media_pipeline_start - Mark a pipeline as streaming - * @pad: Starting pad + * @origin: Starting pad * @pipe: Media pipeline to be assigned to all pads in the pipeline. * - * Mark all pads connected to a given pad through enabled links, either + * Mark all pads connected to pad @origin through enabled links, either * directly or indirectly, as streaming. The given pipeline object is assigned * to every pad in the pipeline and stored in the media_pad pipe field. * @@ -1155,17 +1155,17 @@ struct media_entity *media_graph_walk_next(struct media_graph *graph); * pipeline pointer must be identical for all nested calls to * media_pipeline_start(). */ -__must_check int media_pipeline_start(struct media_pad *pad, +__must_check int media_pipeline_start(struct media_pad *origin, struct media_pipeline *pipe); /** * __media_pipeline_start - Mark a pipeline as streaming * - * @pad: Starting pad + * @origin: Starting pad * @pipe: Media pipeline to be assigned to all pads in the pipeline. * * ..note:: This is the non-locking version of media_pipeline_start() */ -__must_check int __media_pipeline_start(struct media_pad *pad, +__must_check int __media_pipeline_start(struct media_pad *origin, struct media_pipeline *pipe); /** -- cgit v1.2.3 From 9907cda95fcbf44141b1292faab89cf8ec542f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juraj=20=C5=A0arinay?= Date: Sun, 3 Nov 2024 13:45:25 +0100 Subject: net: nfc: Propagate ISO14443 type A target ATS to userspace via netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a 20-byte field ats to struct nfc_target and expose it as NFC_ATTR_TARGET_ATS via the netlink interface. The payload contains 'historical bytes' that help to distinguish cards from one another. The information is commonly used to assemble an emulated ATR similar to that reported by smart cards with contacts. Add a 20-byte field target_ats to struct nci_dev to hold the payload obtained in nci_rf_intf_activated_ntf_packet() and copy it to over to nfc_target.ats in nci_activate_target(). The approach is similar to the handling of 'general bytes' within ATR_RES. Replace the hard-coded size of rats_res within struct activation_params_nfca_poll_iso_dep by the equal constant NFC_ATS_MAXSIZE now defined in nfc.h Within NCI, the information corresponds to the 'RATS Response' activation parameter that omits the initial length byte TL. This loses no information and is consistent with our handling of SENSB_RES that also drops the first (constant) byte. Tested with nxp_nci_i2c on a few type A targets including an ICAO 9303 compliant passport. I refrain from the corresponding change to digital_in_recv_ats() to have the few drivers based on digital.h fill nfc_target.ats, as I have no way to test it. That class of drivers appear not to set NFC_ATTR_TARGET_SENSB_RES either. Consider a separate patch to propagate (all) the parameters. Signed-off-by: Juraj Šarinay Link: https://patch.msgid.link/20241103124525.8392-1-juraj@sarinay.com Signed-off-by: Paolo Abeni --- include/net/nfc/nci.h | 2 +- include/net/nfc/nci_core.h | 4 ++++ include/net/nfc/nfc.h | 4 ++++ include/uapi/linux/nfc.h | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index dc36519d16aa..09efcaed7c3f 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -475,7 +475,7 @@ struct nci_rf_discover_ntf { #define NCI_OP_RF_INTF_ACTIVATED_NTF nci_opcode_pack(NCI_GID_RF_MGMT, 0x05) struct activation_params_nfca_poll_iso_dep { __u8 rats_res_len; - __u8 rats_res[20]; + __u8 rats_res[NFC_ATS_MAXSIZE]; }; struct activation_params_nfcb_poll_iso_dep { diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index ea8595651c38..e180bdf2f82b 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -265,6 +265,10 @@ struct nci_dev { /* stored during intf_activated_ntf */ __u8 remote_gb[NFC_MAX_GT_LEN]; __u8 remote_gb_len; + + /* stored during intf_activated_ntf */ + __u8 target_ats[NFC_ATS_MAXSIZE]; + __u8 target_ats_len; }; /* ----- NCI Devices ----- */ diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 3a3781838c67..127e6c7d910d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -86,6 +86,8 @@ struct nfc_ops { * is a type A one. The %sens_res most significant byte must be byte 2 * as described by the NFC Forum digital specification (i.e. the platform * configuration one) while %sens_res least significant byte is byte 1. + * @ats_len: length of Answer To Select in bytes + * @ats: Answer To Select returned by an ISO 14443 Type A target upon activation */ struct nfc_target { u32 idx; @@ -105,6 +107,8 @@ struct nfc_target { u8 is_iso15693; u8 iso15693_dsfid; u8 iso15693_uid[NFC_ISO15693_UID_MAXSIZE]; + u8 ats_len; + u8 ats[NFC_ATS_MAXSIZE]; }; /** diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 4fa4e979e948..2f5b4be25261 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -164,6 +164,7 @@ enum nfc_commands { * @NFC_ATTR_VENDOR_SUBCMD: Vendor specific sub command * @NFC_ATTR_VENDOR_DATA: Vendor specific data, to be optionally passed * to a vendor specific command implementation + * @NFC_ATTR_TARGET_ATS: ISO 14443 type A target Answer To Select */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -198,6 +199,7 @@ enum nfc_attrs { NFC_ATTR_VENDOR_ID, NFC_ATTR_VENDOR_SUBCMD, NFC_ATTR_VENDOR_DATA, + NFC_ATTR_TARGET_ATS, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -225,6 +227,7 @@ enum nfc_sdp_attr { #define NFC_GB_MAXSIZE 48 #define NFC_FIRMWARE_NAME_MAXSIZE 32 #define NFC_ISO15693_UID_MAXSIZE 8 +#define NFC_ATS_MAXSIZE 20 /* NFC protocols */ #define NFC_PROTO_JEWEL 1 -- cgit v1.2.3 From 495e7c8e9601245738a6ab53c992cc5aa0b13cb4 Mon Sep 17 00:00:00 2001 From: Jinjian Song Date: Mon, 4 Nov 2024 17:44:34 +0800 Subject: wwan: core: Add WWAN ADB and MIPC port type Add new WWAN ports that connect to the device's ADB protocol interface and MTK MIPC diagnostic interface. Signed-off-by: Jinjian Song Reviewed-by: Sergey Ryazanov Signed-off-by: Paolo Abeni --- include/linux/wwan.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 170fdee6339c..79c781875c09 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -17,6 +17,8 @@ * @WWAN_PORT_FIREHOSE: XML based command protocol * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems * @WWAN_PORT_FASTBOOT: Fastboot protocol control + * @WWAN_PORT_ADB: ADB protocol control + * @WWAN_PORT_MIPC: MTK MIPC diagnostic interface * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -30,6 +32,8 @@ enum wwan_port_type { WWAN_PORT_FIREHOSE, WWAN_PORT_XMMRPC, WWAN_PORT_FASTBOOT, + WWAN_PORT_ADB, + WWAN_PORT_MIPC, /* Add new port types above this line */ -- cgit v1.2.3 From ab9bc81c1cf0efc7fc5a3aa4e562aa88d09ada57 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Nov 2024 05:45:34 -0700 Subject: Revert "block: pre-calculate max_zone_append_sectors" This causes issue on, at least, nvme-mpath where my boot fails with: WARNING: CPU: 354 PID: 2729 at block/blk-settings.c:75 blk_validate_limits+0x356/0x380 Modules linked in: tg3(+) nvme usbcore scsi_mod ptp i2c_piix4 libphy nvme_core crc32c_intel scsi_common usb_common pps_core i2c_smbus CPU: 354 UID: 0 PID: 2729 Comm: kworker/u2061:1 Not tainted 6.12.0-rc6+ #181 Hardware name: Dell Inc. PowerEdge R7625/06444F, BIOS 1.8.3 04/02/2024 Workqueue: async async_run_entry_fn RIP: 0010:blk_validate_limits+0x356/0x380 Code: f6 47 01 04 75 28 83 bf 94 00 00 00 00 75 39 83 bf 98 00 00 00 00 75 34 83 7f 68 00 75 32 31 c0 83 7f 5c 00 0f 84 9b fd ff ff <0f> 0b eb 13 0f 0b eb 0f 48 c7 c0 74 12 58 92 48 89 c7 e8 13 76 46 RSP: 0018:ffffa8a1dfb93b30 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff9232829c8388 RCX: 0000000000000088 RDX: 0000000000000080 RSI: 0000000000000200 RDI: ffffa8a1dfb93c38 RBP: 000000000000000c R08: 00000000ffffffff R09: 000000000000ffff R10: 0000000000000000 R11: 0000000000000000 R12: ffff9232829b9000 R13: ffff9232829b9010 R14: ffffa8a1dfb93c38 R15: ffffa8a1dfb93c38 FS: 0000000000000000(0000) GS:ffff923867c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c1b92480a8 CR3: 0000002484ff0002 CR4: 0000000000370ef0 Call Trace: ? __warn+0xca/0x1a0 ? blk_validate_limits+0x356/0x380 ? report_bug+0x11a/0x1a0 ? handle_bug+0x5e/0x90 ? exc_invalid_op+0x16/0x40 ? asm_exc_invalid_op+0x16/0x20 ? blk_validate_limits+0x356/0x380 blk_alloc_queue+0x7a/0x250 __blk_alloc_disk+0x39/0x80 nvme_mpath_alloc_disk+0x13d/0x1b0 [nvme_core] nvme_scan_ns+0xcc7/0x1010 [nvme_core] async_run_entry_fn+0x27/0x120 process_scheduled_works+0x1a0/0x360 worker_thread+0x2bc/0x350 ? pr_cont_work+0x1b0/0x1b0 kthread+0x111/0x120 ? kthread_unuse_mm+0x90/0x90 ret_from_fork+0x30/0x40 ? kthread_unuse_mm+0x90/0x90 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- presumably due to max_zone_append_sectors not being cleared to zero, resulting in blk_validate_zoned_limits() complaining and failing. This reverts commit 2a8f6153e1c2db06a537a5c9d61102eb591776f1. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6d1413bd69a5..7bfc877e159e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,7 +375,6 @@ struct queue_limits { unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; - unsigned int max_hw_zone_append_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -1205,9 +1204,25 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } +static inline unsigned int +queue_limits_max_zone_append_sectors(const struct queue_limits *l) +{ + unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); + + return min_not_zero(l->max_zone_append_sectors, max_sectors); +} + +static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) +{ + if (!blk_queue_is_zoned(q)) + return 0; + + return queue_limits_max_zone_append_sectors(&q->limits); +} + static inline bool queue_emulates_zone_append(struct request_queue *q) { - return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors; + return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; } static inline bool bdev_emulates_zone_append(struct block_device *bdev) @@ -1218,7 +1233,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev) static inline unsigned int bdev_max_zone_append_sectors(struct block_device *bdev) { - return bdev_limits(bdev)->max_zone_append_sectors; + return queue_max_zone_append_sectors(bdev_get_queue(bdev)); } static inline unsigned int bdev_max_segments(struct block_device *bdev) -- cgit v1.2.3 From df81366b484da9618cec12cfc30ec93f7d4b6ac7 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 1 Nov 2024 16:21:43 +0800 Subject: wifi: mac80211: fix description of ieee80211_set_active_links() for new sequence The sequence of calls has changed, but the description is inconsistent. So, fix the description. Fixes: 188a1bf89432 ("wifi: mac80211: re-order assigning channel in activate links") Signed-off-by: Zong-Zhe Yang Link: https://patch.msgid.link/20241101082143.11138-1-kevin_yang@realtek.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b4f246cdcca4..a97c9f85ae9a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7672,12 +7672,12 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * * - change_vif_links(0x11) * - unassign_vif_chanctx(link_id=0) + * - assign_vif_chanctx(link_id=4) * - change_sta_links(0x11) for each affected STA (the AP) * (TDLS connections on now inactive links should be torn down) * - remove group keys on the old link (link_id 0) * - add new group keys (GTK/IGTK/BIGTK) on the new link (link_id 4) * - change_sta_links(0x10) for each affected STA (the AP) - * - assign_vif_chanctx(link_id=4) * - change_vif_links(0x10) * * Return: 0 on success. An error code otherwise. -- cgit v1.2.3 From 1900e1a4495b7376cb9b4e58f1d846660f4c9c4b Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 6 Nov 2024 15:34:45 +0800 Subject: nvme: add reservation command's defines This is a preparation patch for NVMeOF target reservation commands implantation. Add the defines of reservation command, such as reservation log and sub operations. Signed-off-by: Guixin Liu Tested-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b58d9405d65e..44d048d68503 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2037,4 +2037,72 @@ struct nvme_completion { #define NVME_MINOR(ver) (((ver) >> 8) & 0xff) #define NVME_TERTIARY(ver) ((ver) & 0xff) +enum { + NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00, +}; + +enum { + NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00, + NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01, + NVME_PR_LOG_RESERVATION_RELEASED = 0x02, + NVME_PR_LOG_RESERVATOPM_PREEMPTED = 0x03, +}; + +enum { + NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1, + NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2, + NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3, +}; + +struct nvme_pr_log { + __le64 count; + __u8 type; + __u8 nr_pages; + __u8 rsvd1[2]; + __le32 nsid; + __u8 rsvd2[48]; +}; + +struct nvmet_pr_register_data { + __le64 crkey; + __le64 nrkey; +}; + +struct nvmet_pr_acquire_data { + __le64 crkey; + __le64 prkey; +}; + +struct nvmet_pr_release_data { + __le64 crkey; +}; + +enum nvme_pr_capabilities { + NVME_PR_SUPPORT_PTPL = 1, + NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1, + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2, + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3, + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4, + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5, + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6, + NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7, +}; + +enum nvme_pr_register_action { + NVME_PR_REGISTER_ACT_REG = 0, + NVME_PR_REGISTER_ACT_UNREG = 1, + NVME_PR_REGISTER_ACT_REPLACE = 1 << 1, +}; + +enum nvme_pr_acquire_action { + NVME_PR_ACQUIRE_ACT_ACQUIRE = 0, + NVME_PR_ACQUIRE_ACT_PREEMPT = 1, + NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1, +}; + +enum nvme_pr_release_action { + NVME_PR_RELEASE_ACT_RELEASE = 0, + NVME_PR_RELEASE_ACT_CLEAR = 1, +}; + #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From beeb9220c7307fbb61a2cd6575907db52bde722f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 23 Oct 2024 19:27:04 +0300 Subject: mm: vmalloc: group declarations depending on CONFIG_MMU together Patch series "x86/module: use large ROX pages for text allocations", v7. These patches add support for using large ROX pages for allocations of executable memory on x86. They address Andy's comments [1] about having executable mappings for code that was not completely formed. The approach taken is to allocate ROX memory along with writable but not executable memory and use the writable copy to perform relocations and alternatives patching. After the module text gets into its final shape, the contents of the writable memory is copied into the actual ROX location using text poking. The allocations of the ROX memory use vmalloc(VMAP_ALLOW_HUGE_MAP) to allocate PMD aligned memory, fill that memory with invalid instructions and in the end remap it as ROX. Portions of these large pages are handed out to execmem_alloc() callers without any changes to the permissions. When the memory is freed with execmem_free() it is invalidated again so that it won't contain stale instructions. The module memory allocation, x86 code dealing with relocations and alternatives patching take into account the existence of the two copies, the writable memory and the ROX memory at the actual allocated virtual address. [1] https://lore.kernel.org/all/a17c65c6-863f-4026-9c6f-a04b659e9ab4@app.fastmail.com This patch (of 8): There are a couple of declarations that depend on CONFIG_MMU in include/linux/vmalloc.h spread all over the file. Group them all together to improve code readability. No functional changes. Link: https://lkml.kernel.org/r/20241023162711.2579610-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20241023162711.2579610-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Christoph Hellwig Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Luis Chamberlain Tested-by: kdevops Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Stafford Horne Cc: Steven Rostedt (Google) Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 60 ++++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ad2ce7a6ab7a..27408f21e501 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -134,12 +134,6 @@ extern void vm_unmap_ram(const void *mem, unsigned int count); extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void); -#ifdef CONFIG_MMU -extern unsigned long vmalloc_nr_pages(void); -#else -static inline unsigned long vmalloc_nr_pages(void) { return 0; } -#endif - extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); #define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) @@ -266,12 +260,29 @@ static inline bool is_vm_area_hugepages(const void *addr) #endif } +/* for /proc/kcore */ +long vread_iter(struct iov_iter *iter, const char *addr, size_t count); + +/* + * Internals. Don't use.. + */ +__init void vm_area_add_early(struct vm_struct *vm); +__init void vm_area_register_early(struct vm_struct *vm, size_t align); + +int register_vmap_purge_notifier(struct notifier_block *nb); +int unregister_vmap_purge_notifier(struct notifier_block *nb); + #ifdef CONFIG_MMU +#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) + +unsigned long vmalloc_nr_pages(void); + int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); + static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); @@ -279,24 +290,14 @@ static inline void set_vm_flush_reset_perms(void *addr) if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } +#else /* !CONFIG_MMU */ +#define VMALLOC_TOTAL 0UL -#else -static inline void set_vm_flush_reset_perms(void *addr) -{ -} -#endif - -/* for /proc/kcore */ -extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); - -/* - * Internals. Don't use.. - */ -extern __init void vm_area_add_early(struct vm_struct *vm); -extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +static inline unsigned long vmalloc_nr_pages(void) { return 0; } +static inline void set_vm_flush_reset_perms(void *addr) {} +#endif /* CONFIG_MMU */ -#ifdef CONFIG_SMP -# ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_SMP) struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align); @@ -311,22 +312,9 @@ pcpu_get_vm_areas(const unsigned long *offsets, return NULL; } -static inline void -pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) -{ -} -# endif -#endif - -#ifdef CONFIG_MMU -#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -#else -#define VMALLOC_TOTAL 0UL +static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {} #endif -int register_vmap_purge_notifier(struct notifier_block *nb); -int unregister_vmap_purge_notifier(struct notifier_block *nb); - #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else -- cgit v1.2.3 From 0c3beacf681ec897e0b36685a9b49d01f5cb2dfb Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 23 Oct 2024 19:27:06 +0300 Subject: asm-generic: introduce text-patching.h Several architectures support text patching, but they name the header files that declare patching functions differently. Make all such headers consistently named text-patching.h and add an empty header in asm-generic for architectures that do not support text patching. Link: https://lkml.kernel.org/r/20241023162711.2579610-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Christoph Hellwig Acked-by: Geert Uytterhoeven # m68k Acked-by: Arnd Bergmann Reviewed-by: Luis Chamberlain Tested-by: kdevops Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov (AMD) Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Dinh Nguyen Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Stafford Horne Cc: Steven Rostedt (Google) Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/text-patching.h | 5 +++++ include/linux/text-patching.h | 15 +++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 include/asm-generic/text-patching.h create mode 100644 include/linux/text-patching.h (limited to 'include') diff --git a/include/asm-generic/text-patching.h b/include/asm-generic/text-patching.h new file mode 100644 index 000000000000..2245c641b741 --- /dev/null +++ b/include/asm-generic/text-patching.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_TEXT_PATCHING_H +#define _ASM_GENERIC_TEXT_PATCHING_H + +#endif /* _ASM_GENERIC_TEXT_PATCHING_H */ diff --git a/include/linux/text-patching.h b/include/linux/text-patching.h new file mode 100644 index 000000000000..ad5877ab0855 --- /dev/null +++ b/include/linux/text-patching.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TEXT_PATCHING_H +#define _LINUX_TEXT_PATCHING_H + +#include + +#ifndef text_poke_copy +static inline void *text_poke_copy(void *dst, const void *src, size_t len) +{ + return memcpy(dst, src, len); +} +#define text_poke_copy text_poke_copy +#endif + +#endif /* _LINUX_TEXT_PATCHING_H */ -- cgit v1.2.3 From 0c133b1e78cd34dd9d18da707dc6f46170e9129e Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 23 Oct 2024 19:27:07 +0300 Subject: module: prepare to handle ROX allocations for text In order to support ROX allocations for module text, it is necessary to handle modifications to the code, such as relocations and alternatives patching, without write access to that memory. One option is to use text patching, but this would make module loading extremely slow and will expose executable code that is not finally formed. A better way is to have memory allocated with ROX permissions contain invalid instructions and keep a writable, but not executable copy of the module text. The relocations and alternative patches would be done on the writable copy using the addresses of the ROX memory. Once the module is completely ready, the updated text will be copied to ROX memory using text patching in one go and the writable copy will be freed. Add support for that to module initialization code and provide necessary interfaces in execmem. Link: https://lkml.kernel.org/r/20241023162711.2579610-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewd-by: Luis Chamberlain Tested-by: kdevops Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: Dave Hansen Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Stafford Horne Cc: Steven Rostedt (Google) Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/execmem.h | 23 +++++++++++++++++++++++ include/linux/module.h | 16 ++++++++++++++++ include/linux/moduleloader.h | 4 ++++ 3 files changed, 43 insertions(+) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 32cef1144117..dfdf19f8a5e8 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -46,9 +46,11 @@ enum execmem_type { /** * enum execmem_range_flags - options for executable memory allocations * @EXECMEM_KASAN_SHADOW: allocate kasan shadow + * @EXECMEM_ROX_CACHE: allocations should use ROX cache of huge pages */ enum execmem_range_flags { EXECMEM_KASAN_SHADOW = (1 << 0), + EXECMEM_ROX_CACHE = (1 << 1), }; /** @@ -123,6 +125,27 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +/** + * execmem_update_copy - copy an update to executable memory + * @dst: destination address to update + * @src: source address containing the data + * @size: how many bytes of memory shold be copied + * + * Copy @size bytes from @src to @dst using text poking if the memory at + * @dst is read-only. + * + * Return: a pointer to @dst or NULL on error + */ +void *execmem_update_copy(void *dst, const void *src, size_t size); + +/** + * execmem_is_rox - check if execmem is read-only + * @type - the execmem type to check + * + * Return: %true if the @type is read-only, %false if it's writable + */ +bool execmem_is_rox(enum execmem_type type); + #if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) void execmem_init(void); #else diff --git a/include/linux/module.h b/include/linux/module.h index 88ecc5e9f523..2a9386cbdf85 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -367,6 +367,8 @@ enum mod_mem_type { struct module_memory { void *base; + void *rw_copy; + bool is_rox; unsigned int size; #ifdef CONFIG_MODULES_TREE_LOOKUP @@ -767,6 +769,15 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); +void *__module_writable_address(struct module *mod, void *loc); + +static inline void *module_writable_address(struct module *mod, void *loc) +{ + if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod) + return loc; + return __module_writable_address(mod, loc); +} + #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -874,6 +885,11 @@ static inline bool module_is_coming(struct module *mod) { return false; } + +static inline void *module_writable_address(struct module *mod, void *loc) +{ + return loc; +} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index e395461d59e5..1f5507ba5a12 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,6 +108,10 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +int module_post_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod); + #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else -- cgit v1.2.3 From 0c6378a71574daa6cd1534ad42a956e3262756c7 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 23 Oct 2024 19:27:08 +0300 Subject: arch: introduce set_direct_map_valid_noflush() Add an API that will allow updates of the direct/linear map for a set of physically contiguous pages. It will be used in the following patches. Link: https://lkml.kernel.org/r/20241023162711.2579610-6-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Tested-by: kdevops Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Stafford Horne Cc: Steven Rostedt (Google) Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/set_memory.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index e7aec20fb44f..3030d9245f5a 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -34,6 +34,12 @@ static inline int set_direct_map_default_noflush(struct page *page) return 0; } +static inline int set_direct_map_valid_noflush(struct page *page, + unsigned nr, bool valid) +{ + return 0; +} + static inline bool kernel_page_present(struct page *page) { return true; -- cgit v1.2.3 From 2e45474ab14f0f17c1091c503a13ff2fe2a84486 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 23 Oct 2024 19:27:10 +0300 Subject: execmem: add support for cache of large ROX pages Using large pages to map text areas reduces iTLB pressure and improves performance. Extend execmem_alloc() with an ability to use huge pages with ROX permissions as a cache for smaller allocations. To populate the cache, a writable large page is allocated from vmalloc with VM_ALLOW_HUGE_VMAP, filled with invalid instructions and then remapped as ROX. The direct map alias of that large page is exculded from the direct map. Portions of that large page are handed out to execmem_alloc() callers without any changes to the permissions. When the memory is freed with execmem_free() it is invalidated again so that it won't contain stale instructions. An architecture has to implement execmem_fill_trapping_insns() callback and select ARCH_HAS_EXECMEM_ROX configuration option to be able to use the ROX cache. The cache is enabled on per-range basis when an architecture sets EXECMEM_ROX_CACHE flag in definition of an execmem_range. Link: https://lkml.kernel.org/r/20241023162711.2579610-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Luis Chamberlain Tested-by: kdevops Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: Dave Hansen Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Stafford Horne Cc: Steven Rostedt (Google) Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/execmem.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index dfdf19f8a5e8..1517fa196bf7 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -53,6 +53,20 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +/** + * execmem_fill_trapping_insns - set memory to contain instructions that + * will trap + * @ptr: pointer to memory to fill + * @size: size of the range to fill + * @writable: is the memory poited by @ptr is writable or ROX + * + * A hook for architecures to fill execmem ranges with invalid instructions. + * Architectures that use EXECMEM_ROX_CACHE must implement this. + */ +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); +#endif + /** * struct execmem_range - definition of an address space suitable for code and * related data allocations -- cgit v1.2.3 From 7c8c76e446ca0079692fad44a3993cb1d7666c21 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:54 -0700 Subject: maple_tree: add mas_for_each_rev() helper Patch series "page allocation tag compression", v4. This patchset implements several improvements: 1. Gracefully handles module unloading while there are used allocations allocated from that module; 2. Provides an option to store page allocation tag references in the page flags, removing dependency on page extensions and eliminating the memory overhead from storing page allocation references (~0.2% of total system memory). This also improves page allocation performance when CONFIG_MEM_ALLOC_PROFILING is enabled by eliminating page extension lookup. Page allocation performance overhead is reduced from 41% to 5.5%. Patch #1 introduces mas_for_each_rev() helper function. Patch #2 introduces shutdown_mem_profiling() helper function to be used when disabling memory allocation profiling. Patch #3 copies module tags into virtually contiguous memory which serves two purposes: - Lets us deal with the situation when module is unloaded while there are still live allocations from that module. Since we are using a copy version of the tags we can safely unload the module. Space and gaps in this contiguous memory are managed using a maple tree. - Enables simple indexing of the tags in the later patches. Patch #4 changes the way we allocate virtually contiguous memory for module tags to reserve only vitrual area and populate physical pages only as needed at module load time. Patch #5 abstracts page allocation tag reference to simplify later changes. Patch #6 adds compression option to the sysctl.vm.mem_profiling boot parameter for storing page allocation tag references inside page flags if they fit. If the number of available page flag bits is insufficient to address all kernel allocations, memory allocation profiling gets disabled with an appropriate warning. This patch (of 6): Add mas_for_each_rev() function to iterate maple tree nodes in reverse order. Link: https://lkml.kernel.org/r/20241023170759.999909-1-surenb@google.com Link: https://lkml.kernel.org/r/20241023170759.999909-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Liam R. Howlett Reviewed-by: Liam R. Howlett Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 61c236850ca8..cbbcd18d4186 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -592,6 +592,20 @@ static __always_inline void mas_reset(struct ma_state *mas) #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) +/** + * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order. + * @__mas: Maple Tree operation state (maple_state) + * @__entry: Entry retrieved from the tree + * @__min: minimum index to retrieve from the tree + * + * When returned, mas->index and mas->last will hold the entire range for the + * entry. + * + * Note: may return the zero entry. + */ +#define mas_for_each_rev(__mas, __entry, __min) \ + while (((__entry) = mas_find_rev((__mas), (__min))) != NULL) + #ifdef CONFIG_DEBUG_MAPLE_TREE enum mt_dump_format { mt_dump_dec, -- cgit v1.2.3 From 0db6f8d7820a4b788565dac8eed52bfc2c3216da Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:56 -0700 Subject: alloc_tag: load module tags into separate contiguous memory When a module gets unloaded there is a possibility that some of the allocations it made are still used and therefore the allocation tags corresponding to these allocations are still referenced. As such, the memory for these tags can't be freed. This is currently handled as an abnormal situation and module's data section is not being unloaded. To handle this situation without keeping module's data in memory, allow codetags with longer lifespan than the module to be loaded into their own separate memory. The in-use memory areas and gaps after module unloading in this separate memory are tracked using maple trees. Allocation tags arrange their separate memory so that it is virtually contiguous and that will allow simple allocation tag indexing later on in this patchset. The size of this virtually contiguous memory is set to store up to 100000 allocation tags. [surenb@google.com: fix empty codetag module section handling] Link: https://lkml.kernel.org/r/20241101000017.3856204-1-surenb@google.com [akpm@linux-foundation.org: update comment, per Dan] Link: https://lkml.kernel.org/r/20241023170759.999909-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Cc: Dan Carpenter Signed-off-by: Andrew Morton --- include/asm-generic/codetag.lds.h | 19 +++++++++++++++++++ include/linux/alloc_tag.h | 13 +++++++++++-- include/linux/codetag.h | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 62 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h index 64f536b80380..372c320c5043 100644 --- a/include/asm-generic/codetag.lds.h +++ b/include/asm-generic/codetag.lds.h @@ -11,4 +11,23 @@ #define CODETAG_SECTIONS() \ SECTION_WITH_BOUNDARIES(alloc_tags) +/* + * Module codetags which aren't used after module unload, therefore have the + * same lifespan as the module and can be safely unloaded with the module. + */ +#define MOD_CODETAG_SECTIONS() + +#define MOD_SEPARATE_CODETAG_SECTION(_name) \ + .codetag.##_name : { \ + SECTION_WITH_BOUNDARIES(_name) \ + } + +/* + * For codetags which might be used after module unload, therefore might stay + * longer in memory. Each such codetag type has its own section so that we can + * unload them individually once unused. + */ +#define MOD_SEPARATE_CODETAG_SECTIONS() \ + MOD_SEPARATE_CODETAG_SECTION(alloc_tags) + #endif /* __ASM_GENERIC_CODETAG_LDS_H */ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 941deffc590d..55d30543c4c7 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -30,6 +30,13 @@ struct alloc_tag { struct alloc_tag_counters __percpu *counters; } __aligned(8); +struct alloc_tag_module_section { + unsigned long start_addr; + unsigned long end_addr; + /* used size */ + unsigned long size; +}; + #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG #define CODETAG_EMPTY ((void *)1) @@ -54,6 +61,8 @@ static inline void set_codetag_empty(union codetag_ref *ref) {} #ifdef CONFIG_MEM_ALLOC_PROFILING +#define ALLOC_TAG_SECTION_NAME "alloc_tags" + struct codetag_bytes { struct codetag *ct; s64 bytes; @@ -76,7 +85,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_shared_alloc_tag }; @@ -85,7 +94,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; diff --git a/include/linux/codetag.h b/include/linux/codetag.h index c2a579ccd455..d10bd9810d32 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -35,8 +35,15 @@ struct codetag_type_desc { size_t tag_size; void (*module_load)(struct codetag_type *cttype, struct codetag_module *cmod); - bool (*module_unload)(struct codetag_type *cttype, + void (*module_unload)(struct codetag_type *cttype, struct codetag_module *cmod); +#ifdef CONFIG_MODULES + void (*module_replaced)(struct module *mod, struct module *new_mod); + bool (*needs_section_mem)(struct module *mod, unsigned long size); + void *(*alloc_section_mem)(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align); + void (*free_section_mem)(struct module *mod, bool used); +#endif }; struct codetag_iterator { @@ -71,11 +78,31 @@ struct codetag_type * codetag_register_type(const struct codetag_type_desc *desc); #if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) + +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size); +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align); +void codetag_free_module_sections(struct module *mod); +void codetag_module_replaced(struct module *mod, struct module *new_mod); void codetag_load_module(struct module *mod); -bool codetag_unload_module(struct module *mod); -#else +void codetag_unload_module(struct module *mod); + +#else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ + +static inline bool +codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) { return false; } +static inline void * +codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) { return NULL; } +static inline void codetag_free_module_sections(struct module *mod) {} +static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} static inline void codetag_load_module(struct module *mod) {} -static inline bool codetag_unload_module(struct module *mod) { return true; } -#endif +static inline void codetag_unload_module(struct module *mod) {} + +#endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ #endif /* _LINUX_CODETAG_H */ -- cgit v1.2.3 From 0f9b685626daa2f8e19a9788625c9b624c223e45 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:57 -0700 Subject: alloc_tag: populate memory for module tags as needed The memory reserved for module tags does not need to be backed by physical pages until there are tags to store there. Change the way we reserve this memory to allocate only virtual area for the tags and populate it with physical pages as needed when we load a module. [surenb@google.com: avoid execmem_vmap() when !MMU] Link: https://lkml.kernel.org/r/20241031233611.3833002-1-surenb@google.com Link: https://lkml.kernel.org/r/20241023170759.999909-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/execmem.h | 12 ++++++++++++ include/linux/vmalloc.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 1517fa196bf7..64130ae19690 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -139,6 +139,18 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +#ifdef CONFIG_MMU +/** + * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory + * @size: size of the virtual mapping in bytes + * + * Maps virtually contiguous area in the range suitable for EXECMEM_MODULE_DATA. + * + * Return: the area descriptor on success or %NULL on failure. + */ +struct vm_struct *execmem_vmap(size_t size); +#endif + /** * execmem_update_copy - copy an update to executable memory * @dst: destination address to update diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 27408f21e501..31e9ffd936e3 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -202,6 +202,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); +int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, + struct page **pages, unsigned int page_shift); + /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() -- cgit v1.2.3 From 42895a86124418d8dd29a93812bc282e569ccfee Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:58 -0700 Subject: alloc_tag: introduce pgtag_ref_handle to abstract page tag references To simplify later changes to page tag references, introduce new pgtag_ref_handle type. This allows easy replacement of page_ext as a storage of page allocation tags. Link: https://lkml.kernel.org/r/20241023170759.999909-6-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm.h | 25 ++++++------ include/linux/pgalloc_tag.h | 92 ++++++++++++++++++++++++++------------------- 2 files changed, 67 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb070c14e309..f9120ac6d901 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4181,37 +4181,38 @@ static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new return; for (i = nr_pages; i < (1 << old_order); i += nr_pages) { - union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i)); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { + if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) { /* Set new reference to point to the original tag */ - alloc_tag_ref_set(ref, tag); - put_page_tag_ref(ref); + alloc_tag_ref_set(&ref, tag); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) { + union pgtag_ref_handle handle; + union codetag_ref ref; struct alloc_tag *tag; - union codetag_ref *ref; tag = pgalloc_tag_get(&old->page); if (!tag) return; - ref = get_page_tag_ref(&new->page); - if (!ref) + if (!get_page_tag_ref(&new->page, &ref, &handle)) return; /* Clear the old ref to the original allocation tag. */ clear_page_tag_ref(&old->page); /* Decrement the counters of the tag on get_new_folio. */ - alloc_tag_sub(ref, folio_size(new)); - - __alloc_tag_ref_set(ref, tag); - - put_page_tag_ref(ref); + alloc_tag_sub(&ref, folio_size(new)); + __alloc_tag_ref_set(&ref, tag); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } #else /* !CONFIG_MEM_ALLOC_PROFILING */ static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 59a3deb792a8..b13cd3313a88 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -11,46 +11,59 @@ #include +union pgtag_ref_handle { + union codetag_ref *ref; /* reference in page extension */ +}; + extern struct page_ext_operations page_alloc_tagging_ops; -static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref, + union pgtag_ref_handle *handle) { - return (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); -} + struct page_ext *page_ext; + union codetag_ref *tmp; -static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) -{ - return (void *)ref - page_alloc_tagging_ops.offset; + if (!page) + return false; + + page_ext = page_ext_get(page); + if (!page_ext) + return false; + + tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); + ref->ct = tmp->ct; + handle->ref = tmp; + return true; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static inline union codetag_ref *get_page_tag_ref(struct page *page) +static inline void put_page_tag_ref(union pgtag_ref_handle handle) { - if (page) { - struct page_ext *page_ext = page_ext_get(page); + if (WARN_ON(!handle.ref)) + return; - if (page_ext) - return codetag_ref_from_page_ext(page_ext); - } - return NULL; + page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset); } -static inline void put_page_tag_ref(union codetag_ref *ref) +static inline void update_page_tag_ref(union pgtag_ref_handle handle, + union codetag_ref *ref) { - if (WARN_ON(!ref)) + if (WARN_ON(!handle.ref || !ref)) return; - page_ext_put(page_ext_from_codetag_ref(ref)); + handle.ref->ct = ref->ct; } static inline void clear_page_tag_ref(struct page *page) { if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); + if (get_page_tag_ref(page, &ref, &handle)) { + set_codetag_empty(&ref); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } @@ -59,11 +72,13 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { - alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr); - put_page_tag_ref(ref); + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } @@ -71,11 +86,13 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) { if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { - alloc_tag_sub(ref, PAGE_SIZE * nr); - put_page_tag_ref(ref); + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub(&ref, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } @@ -85,13 +102,14 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page) struct alloc_tag *tag = NULL; if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - alloc_tag_sub_check(ref); - if (ref) { - if (ref->ct) - tag = ct_to_alloc_tag(ref->ct); - put_page_tag_ref(ref); + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub_check(&ref); + if (ref.ct) + tag = ct_to_alloc_tag(ref.ct); + put_page_tag_ref(handle); } } @@ -106,8 +124,6 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) #else /* CONFIG_MEM_ALLOC_PROFILING */ -static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } -static inline void put_page_tag_ref(union codetag_ref *ref) {} static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} -- cgit v1.2.3 From 4835f747d3ed181bf2c67930fe06b2c01a5d2323 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:59 -0700 Subject: alloc_tag: support for page allocation tag compression Implement support for storing page allocation tag references directly in the page flags instead of page extensions. sysctl.vm.mem_profiling boot parameter it extended to provide a way for a user to request this mode. Enabling compression eliminates memory overhead caused by page_ext and results in better performance for page allocations. However this mode will not work if the number of available page flag bits is insufficient to address all kernel allocations. Such condition can happen during boot or when loading a module. If this condition is detected, memory allocation profiling gets disabled with an appropriate warning. By default compression mode is disabled. Link: https://lkml.kernel.org/r/20241023170759.999909-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 10 ++- include/linux/codetag.h | 3 + include/linux/page-flags-layout.h | 7 ++ include/linux/pgalloc_tag.h | 145 +++++++++++++++++++++++++++++++++----- 4 files changed, 147 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 55d30543c4c7..7c0786bdf9af 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -30,8 +30,16 @@ struct alloc_tag { struct alloc_tag_counters __percpu *counters; } __aligned(8); +struct alloc_tag_kernel_section { + struct alloc_tag *first_tag; + unsigned long count; +}; + struct alloc_tag_module_section { - unsigned long start_addr; + union { + unsigned long start_addr; + struct alloc_tag *first_tag; + }; unsigned long end_addr; /* used size */ unsigned long size; diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d10bd9810d32..d14dbd26b370 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -13,6 +13,9 @@ struct codetag_module; struct seq_buf; struct module; +#define CODETAG_SECTION_START_PREFIX "__start_" +#define CODETAG_SECTION_STOP_PREFIX "__stop_" + /* * An instance of this structure is created in a special ELF section at every * code location being tagged. At runtime, the special section is treated as diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d79818dc065..4f5c9e979bb9 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -111,5 +111,12 @@ ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \ NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH) +#define NR_NON_PAGEFLAG_BITS (SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + \ + LAST_CPUPID_SHIFT + KASAN_TAG_WIDTH + \ + LRU_GEN_WIDTH + LRU_REFS_WIDTH) + +#define NR_UNUSED_PAGEFLAG_BITS (BITS_PER_LONG - \ + (NR_NON_PAGEFLAG_BITS + NR_PAGEFLAGS)) + #endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index b13cd3313a88..1fe63b52e5e5 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -11,29 +11,118 @@ #include +extern struct page_ext_operations page_alloc_tagging_ops; +extern unsigned long alloc_tag_ref_mask; +extern int alloc_tag_ref_offs; +extern struct alloc_tag_kernel_section kernel_tags; + +DECLARE_STATIC_KEY_FALSE(mem_profiling_compressed); + +typedef u16 pgalloc_tag_idx; + union pgtag_ref_handle { union codetag_ref *ref; /* reference in page extension */ + struct page *page; /* reference in page flags */ }; -extern struct page_ext_operations page_alloc_tagging_ops; +/* Reserved indexes */ +#define CODETAG_ID_NULL 0 +#define CODETAG_ID_EMPTY 1 +#define CODETAG_ID_FIRST 2 + +#ifdef CONFIG_MODULES + +extern struct alloc_tag_module_section module_tags; + +static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx) +{ + return &module_tags.first_tag[idx - kernel_tags.count]; +} + +static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag) +{ + return CODETAG_ID_FIRST + kernel_tags.count + (tag - module_tags.first_tag); +} + +#else /* CONFIG_MODULES */ + +static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx) +{ + pr_warn("invalid page tag reference %lu\n", (unsigned long)idx); + return NULL; +} + +static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag) +{ + pr_warn("invalid page tag 0x%lx\n", (unsigned long)tag); + return CODETAG_ID_NULL; +} + +#endif /* CONFIG_MODULES */ + +static inline void idx_to_ref(pgalloc_tag_idx idx, union codetag_ref *ref) +{ + switch (idx) { + case (CODETAG_ID_NULL): + ref->ct = NULL; + break; + case (CODETAG_ID_EMPTY): + set_codetag_empty(ref); + break; + default: + idx -= CODETAG_ID_FIRST; + ref->ct = idx < kernel_tags.count ? + &kernel_tags.first_tag[idx].ct : + &module_idx_to_tag(idx)->ct; + break; + } +} + +static inline pgalloc_tag_idx ref_to_idx(union codetag_ref *ref) +{ + struct alloc_tag *tag; + + if (!ref->ct) + return CODETAG_ID_NULL; + + if (is_codetag_empty(ref)) + return CODETAG_ID_EMPTY; + + tag = ct_to_alloc_tag(ref->ct); + if (tag >= kernel_tags.first_tag && tag < kernel_tags.first_tag + kernel_tags.count) + return CODETAG_ID_FIRST + (tag - kernel_tags.first_tag); + + return module_tag_to_idx(tag); +} + + /* Should be called only if mem_alloc_profiling_enabled() */ static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref, union pgtag_ref_handle *handle) { - struct page_ext *page_ext; - union codetag_ref *tmp; - if (!page) return false; - page_ext = page_ext_get(page); - if (!page_ext) - return false; + if (static_key_enabled(&mem_profiling_compressed)) { + pgalloc_tag_idx idx; + + idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask; + idx_to_ref(idx, ref); + handle->page = page; + } else { + struct page_ext *page_ext; + union codetag_ref *tmp; + + page_ext = page_ext_get(page); + if (!page_ext) + return false; + + tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); + ref->ct = tmp->ct; + handle->ref = tmp; + } - tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); - ref->ct = tmp->ct; - handle->ref = tmp; return true; } @@ -42,16 +131,35 @@ static inline void put_page_tag_ref(union pgtag_ref_handle handle) if (WARN_ON(!handle.ref)) return; - page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset); + if (!static_key_enabled(&mem_profiling_compressed)) + page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset); } -static inline void update_page_tag_ref(union pgtag_ref_handle handle, - union codetag_ref *ref) +static inline void update_page_tag_ref(union pgtag_ref_handle handle, union codetag_ref *ref) { - if (WARN_ON(!handle.ref || !ref)) - return; - - handle.ref->ct = ref->ct; + if (static_key_enabled(&mem_profiling_compressed)) { + struct page *page = handle.page; + unsigned long old_flags; + unsigned long flags; + unsigned long idx; + + if (WARN_ON(!page || !ref)) + return; + + idx = (unsigned long)ref_to_idx(ref); + idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs; + do { + old_flags = READ_ONCE(page->flags); + flags = old_flags; + flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs); + flags |= idx; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } else { + if (WARN_ON(!handle.ref || !ref)) + return; + + handle.ref->ct = ref->ct; + } } static inline void clear_page_tag_ref(struct page *page) @@ -122,6 +230,8 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } +void __init alloc_tag_sec_init(void); + #else /* CONFIG_MEM_ALLOC_PROFILING */ static inline void clear_page_tag_ref(struct page *page) {} @@ -130,6 +240,7 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} +static inline void alloc_tag_sec_init(void) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ -- cgit v1.2.3 From b7fc16a16b0850e41b88eae0edfa4c085c012347 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 24 Oct 2024 09:23:18 -0700 Subject: mm/codetag: uninline and move pgalloc_tag_copy and pgalloc_tag_split pgalloc_tag_copy() and pgalloc_tag_split() are sizable and outside of any performance-critical paths, so it should be fine to uninline them. Also move their declarations into pgalloc_tag.h which seems like a more appropriate place for them. No functional changes other than uninlining. Link: https://lkml.kernel.org/r/20241024162318.1640781-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Andrew Morton Acked-by: Yu Zhao Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Signed-off-by: Andrew Morton --- include/linux/mm.h | 58 --------------------------------------------- include/linux/pgalloc_tag.h | 5 ++++ 2 files changed, 5 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index f9120ac6d901..08e487e5850a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4166,62 +4166,4 @@ static inline int do_mseal(unsigned long start, size_t len_in, unsigned long fla } #endif -#ifdef CONFIG_MEM_ALLOC_PROFILING -static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) -{ - int i; - struct alloc_tag *tag; - unsigned int nr_pages = 1 << new_order; - - if (!mem_alloc_profiling_enabled()) - return; - - tag = pgalloc_tag_get(&folio->page); - if (!tag) - return; - - for (i = nr_pages; i < (1 << old_order); i += nr_pages) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) { - /* Set new reference to point to the original tag */ - alloc_tag_ref_set(&ref, tag); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} - -static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) -{ - union pgtag_ref_handle handle; - union codetag_ref ref; - struct alloc_tag *tag; - - tag = pgalloc_tag_get(&old->page); - if (!tag) - return; - - if (!get_page_tag_ref(&new->page, &ref, &handle)) - return; - - /* Clear the old ref to the original allocation tag. */ - clear_page_tag_ref(&old->page); - /* Decrement the counters of the tag on get_new_folio. */ - alloc_tag_sub(&ref, folio_size(new)); - __alloc_tag_ref_set(&ref, tag); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); -} -#else /* !CONFIG_MEM_ALLOC_PROFILING */ -static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) -{ -} - -static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) -{ -} -#endif /* CONFIG_MEM_ALLOC_PROFILING */ - #endif /* _LINUX_MM_H */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 1fe63b52e5e5..0e43ab653ab6 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -230,6 +230,9 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } +void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); +void pgalloc_tag_copy(struct folio *new, struct folio *old); + void __init alloc_tag_sec_init(void); #else /* CONFIG_MEM_ALLOC_PROFILING */ @@ -241,6 +244,8 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} static inline void alloc_tag_sec_init(void) {} +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} +static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ -- cgit v1.2.3 From d7cb6d7414ea1b33536fa6d11805cb8dceec1f97 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 7 Nov 2024 15:42:59 +0900 Subject: block: RCU protect disk->conv_zones_bitmap Ensure that a disk revalidation changing the conventional zones bitmap of a disk does not cause invalid memory references when using the disk_zone_is_conv() helper by RCU protecting the disk->conv_zones_bitmap pointer. disk_zone_is_conv() is modified to operate under the RCU read lock and the function disk_set_conv_zones_bitmap() is added to update a disk conv_zones_bitmap pointer using rcu_replace_pointer() with the disk zone_wplugs_lock spinlock held. disk_free_zone_resources() is modified to call disk_update_zone_resources() with a NULL bitmap pointer to free the disk conv_zones_bitmap. disk_set_conv_zones_bitmap() is also used in disk_update_zone_resources() to set the new (revalidated) bitmap and free the old one. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241107064300.227731-2-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7bfc877e159e..6d379803c777 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -195,7 +195,7 @@ struct gendisk { unsigned int nr_zones; unsigned int zone_capacity; unsigned int last_zone_capacity; - unsigned long *conv_zones_bitmap; + unsigned long __rcu *conv_zones_bitmap; unsigned int zone_wplugs_hash_bits; spinlock_t zone_wplugs_lock; struct mempool_s *zone_wplugs_pool; -- cgit v1.2.3 From f3d9bf05140dd242cdc33c431489a853f2bc1b67 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 7 Nov 2024 15:43:00 +0900 Subject: block: Add a public bdev_zone_is_seq() helper Turn the private disk_zone_is_conv() function in blk-zoned.c into a public and documented bdev_zone_is_seq() helper with the inverse polarity of the original function, also adding a check for non-zoned devices so that all file systems can use the helper, even with a regular block device. Suggested-by: Christoph Hellwig Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241107064300.227731-3-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6d379803c777..93551772c1d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1395,6 +1395,33 @@ static inline bool bdev_is_zone_start(struct block_device *bdev, return bdev_offset_from_zone_start(bdev, sector) == 0; } +/** + * bdev_zone_is_seq - check if a sector belongs to a sequential write zone + * @bdev: block device to check + * @sector: sector number + * + * Check if @sector on @bdev is contained in a sequential write required zone. + */ +static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) +{ + bool is_seq = false; + +#if IS_ENABLED(CONFIG_BLK_DEV_ZONED) + if (bdev_is_zoned(bdev)) { + struct gendisk *disk = bdev->bd_disk; + unsigned long *bitmap; + + rcu_read_lock(); + bitmap = rcu_dereference(disk->conv_zones_bitmap); + is_seq = !bitmap || + !test_bit(disk_zone_no(disk, sector), bitmap); + rcu_read_unlock(); + } +#endif + + return is_seq; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q->limits.dma_alignment; -- cgit v1.2.3 From f7470591f8db1a72ce9f7ab49cb13c2b21b92002 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Oct 2024 21:01:12 +0100 Subject: mm: convert page_to_pgoff() to page_pgoff() Patch series "page->index removals in mm", v2. As part of shrinking struct page, we need to stop using page->index. This patchset gets rid of most of the remaining references to page->index in mm, as well as increasing the number of functions which take a const folio/page pointer. It shrinks the text segment of mm by a few hundred bytes in my test config, probably mostly from removing calls to compound_head() in page_to_pgoff(). This patch (of 7): Change the function signature to pass in the folio as all three callers have it. This removes a reference to page->index, which we're trying to get rid of. And add kernel-doc. Link: https://lkml.kernel.org/r/20241005200121.3231142-1-willy@infradead.org Link: https://lkml.kernel.org/r/20241005200121.3231142-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- include/linux/pagemap.h | 31 +++++++++++++++++-------------- 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 08e487e5850a..78848fbefe94 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1895,7 +1895,7 @@ static inline unsigned long page_to_section(const struct page *page) * * Return: The Page Frame Number of the first page in the folio. */ -static inline unsigned long folio_pfn(struct folio *folio) +static inline unsigned long folio_pfn(const struct folio *folio) { return page_to_pfn(&folio->page); } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 68a5f1ff3301..bcf0865a38ae 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1011,22 +1011,25 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping, return read_cache_folio(mapping, index, NULL, file); } -/* - * Get the offset in PAGE_SIZE (even for hugetlb pages). +/** + * page_pgoff - Calculate the logical page offset of this page. + * @folio: The folio containing this page. + * @page: The page which we need the offset of. + * + * For file pages, this is the offset from the beginning of the file + * in units of PAGE_SIZE. For anonymous pages, this is the offset from + * the beginning of the anon_vma in units of PAGE_SIZE. This will + * return nonsense for KSM pages. + * + * Context: Caller must have a reference on the folio or otherwise + * prevent it from being split or freed. + * + * Return: The offset in units of PAGE_SIZE. */ -static inline pgoff_t page_to_pgoff(struct page *page) +static inline pgoff_t page_pgoff(const struct folio *folio, + const struct page *page) { - struct page *head; - - if (likely(!PageTransTail(page))) - return page->index; - - head = compound_head(page); - /* - * We don't initialize ->index for tail pages: calculate based on - * head page - */ - return head->index + page - head; + return folio->index + folio_page_idx(folio, page); } /* -- cgit v1.2.3 From 713da0b33b3e9d16272b57f4c44dee5c052be9b7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Oct 2024 21:01:14 +0100 Subject: mm: renovate page_address_in_vma() This function doesn't modify any of its arguments, so if we make a few other functions take const pointers, we can make page_address_in_vma() take const pointers too. All of its callers have the containing folio already, so pass that in as an argument instead of recalculating it. Also add kernel-doc Link: https://lkml.kernel.org/r/20241005200121.3231142-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/rmap.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index d5e93e44322e..78923015a2e8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -728,11 +728,8 @@ page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) } bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); - -/* - * Used by swapoff to help locate where page is expected in vma. - */ -unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +unsigned long page_address_in_vma(const struct folio *folio, + const struct page *, const struct vm_area_struct *); /* * Cleans the PTEs of shared mappings. -- cgit v1.2.3 From 68158bfa3dbd4af8461ef75a91ffc03be942c8fe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Oct 2024 21:01:15 +0100 Subject: mm: mass constification of folio/page pointers Now that page_pgoff() takes const pointers, we can constify the pointers to a lot of functions. Link: https://lkml.kernel.org/r/20241005200121.3231142-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/ksm.h | 7 ++++--- include/linux/rmap.h | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 29022e71a074..6a53ac4885bb 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -90,7 +90,7 @@ struct folio *ksm_might_need_to_copy(struct folio *folio, void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); -void collect_procs_ksm(struct folio *folio, struct page *page, +void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early); long ksm_process_profit(struct mm_struct *); @@ -122,8 +122,9 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { } -static inline void collect_procs_ksm(struct folio *folio, struct page *page, - struct list_head *to_kill, int force_early) +static inline void collect_procs_ksm(const struct folio *folio, + const struct page *page, struct list_head *to_kill, + int force_early) { } diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 78923015a2e8..683a04088f3f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -171,7 +171,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, unlink_anon_vmas(next); } -struct anon_vma *folio_get_anon_vma(struct folio *folio); +struct anon_vma *folio_get_anon_vma(const struct folio *folio); /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -194,8 +194,8 @@ enum rmap_level { RMAP_LEVEL_PMD, }; -static inline void __folio_rmap_sanity_checks(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) +static inline void __folio_rmap_sanity_checks(const struct folio *folio, + const struct page *page, int nr_pages, enum rmap_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); @@ -771,14 +771,14 @@ struct rmap_walk_control { bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct folio *folio); - struct anon_vma *(*anon_lock)(struct folio *folio, + struct anon_vma *(*anon_lock)(const struct folio *folio, struct rmap_walk_control *rwc); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); -struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, +struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ -- cgit v1.2.3 From 0386aaa6e9c826bc494169a914e01a86befe6edf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Oct 2024 21:01:16 +0100 Subject: bootmem: stop using page->index Encode the type into the bottom four bits of page->private and the info into the remaining bits. Also turn the bootmem type into a named enum. [arnd@arndb.de: bootmem: add bootmem_type stub function] Link: https://lkml.kernel.org/r/20241015143802.577613-1-arnd@kernel.org [akpm@linux-foundation.org: fix build with !CONFIG_HAVE_BOOTMEM_INFO_NODE] Link: https://lore.kernel.org/oe-kbuild-all/202410090311.eaqcL7IZ-lkp@intel.com/ Link: https://lkml.kernel.org/r/20241005200121.3231142-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Arnd Bergmann Cc: kernel test robot Signed-off-by: Andrew Morton --- include/linux/bootmem_info.h | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index cffa38a73618..d8a8d245824a 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -6,11 +6,10 @@ #include /* - * Types for free bootmem stored in page->lru.next. These have to be in - * some random range in unsigned long space for debugging purposes. + * Types for free bootmem stored in the low bits of page->private. */ -enum { - MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, +enum bootmem_type { + MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 1, SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, MIX_SECTION_INFO, NODE_INFO, @@ -21,9 +20,19 @@ enum { void __init register_page_bootmem_info_node(struct pglist_data *pgdat); void get_page_bootmem(unsigned long info, struct page *page, - unsigned long type); + enum bootmem_type type); void put_page_bootmem(struct page *page); +static inline enum bootmem_type bootmem_type(const struct page *page) +{ + return (unsigned long)page->private & 0xf; +} + +static inline unsigned long bootmem_info(const struct page *page) +{ + return (unsigned long)page->private >> 4; +} + /* * Any memory allocated via the memblock allocator and not via the * buddy will be marked reserved already in the memmap. For those @@ -31,7 +40,7 @@ void put_page_bootmem(struct page *page); */ static inline void free_bootmem_page(struct page *page) { - unsigned long magic = page->index; + enum bootmem_type type = bootmem_type(page); /* * The reserve_bootmem_region sets the reserved flag on bootmem @@ -39,7 +48,7 @@ static inline void free_bootmem_page(struct page *page) */ VM_BUG_ON_PAGE(page_ref_count(page) != 2, page); - if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) + if (type == SECTION_INFO || type == MIX_SECTION_INFO) put_page_bootmem(page); else VM_BUG_ON_PAGE(1, page); @@ -53,8 +62,18 @@ static inline void put_page_bootmem(struct page *page) { } +static inline enum bootmem_type bootmem_type(const struct page *page) +{ + return SECTION_INFO; +} + +static inline unsigned long bootmem_info(const struct page *page) +{ + return 0; +} + static inline void get_page_bootmem(unsigned long info, struct page *page, - unsigned long type) + enum bootmem_type type) { } -- cgit v1.2.3 From 6a78699838a0ddeed3620ddf50c1521f1fe1e811 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 31 Oct 2024 21:37:19 +0800 Subject: block: always verify unfreeze lock on the owner task commit f1be1788a32e ("block: model freeze & enter queue as lock for supporting lockdep") tries to apply lockdep for verifying freeze & unfreeze. However, the verification is only done the outmost freeze and unfreeze. This way is actually not correct because q->mq_freeze_depth still may drop to zero on other task instead of the freeze owner task. Fix this issue by always verifying the last unfreeze lock on the owner task context, and make sure both the outmost freeze & unfreeze are verified in the current task. Fixes: f1be1788a32e ("block: model freeze & enter queue as lock for supporting lockdep") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241031133723.303835-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 93551772c1d6..1b51a7c92e9b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -575,6 +575,10 @@ struct request_queue { struct throtl_data *td; #endif struct rcu_head rcu_head; +#ifdef CONFIG_LOCKDEP + struct task_struct *mq_freeze_owner; + int mq_freeze_owner_depth; +#endif wait_queue_head_t mq_freeze_wq; /* * Protect concurrent access to q_usage_counter by -- cgit v1.2.3 From 9c477088b60dd23f3120b2b01b14d85047a033da Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2024 21:19:59 +0100 Subject: net: phy: make genphy_c45_write_eee_adv() static genphy_c45_write_eee_adv() isn't used outside phy-c45.c, so make it static. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/d23bd784-44e6-4a15-af3a-b37379156521@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index bf0eb4e5d35c..bb09a5a128e1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1953,7 +1953,6 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); -int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); -- cgit v1.2.3 From db73835f54fc57bc267d43836905588f24ddac85 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2024 21:25:12 +0100 Subject: net: phy: remove genphy_config_eee_advert bcm_config_lre_aneg() doesn't use genphy_config_eee_advert() any longer. As this was the only user, we can remove genphy_config_eee_advert() now. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/37da7f3e-b883-4c07-9881-b8c0516822b7@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index bb09a5a128e1..1e4127c495c0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1885,7 +1885,6 @@ int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart); -int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); -- cgit v1.2.3 From 48171c65f61148b0025128b70837280123f1309d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 6 Nov 2024 22:37:32 +0100 Subject: ipv4: Prepare ip_route_output() to future .flowi4_tos conversion. Convert the "tos" parameter of ip_route_output() to dscp_t. This way we'll have a dscp_t value directly available when .flowi4_tos will eventually be converted to dscp_t. All ip_route_output() callers but one set this "tos" parameter to 0 and therefore don't need to be adapted to the new prototype. Only br_nf_pre_routing_finish() needs conversion. It can just use ip4h_dscp() to get the DSCP field from the IPv4 header. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/0f10d031dd44c70aae9bc6e19391cb30d5c2fe71.1730928699.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 586e59f7ed8a..0a690adfdff5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -156,12 +156,12 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 * structure is only partially set, it may bypass some fib-rules. */ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, - __be32 saddr, u8 tos, int oif, - __u8 scope) + __be32 saddr, dscp_t dscp, + int oif, __u8 scope) { struct flowi4 fl4 = { .flowi4_oif = oif, - .flowi4_tos = tos, + .flowi4_tos = inet_dscp_to_dsfield(dscp), .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, -- cgit v1.2.3 From b855f02427e995cbc905e134cc3a7f4e503c0455 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 1 Nov 2024 10:12:03 +0100 Subject: media: replace obsolete hans.verkuil@cisco.com alias The old hans.verkuil@cisco.com email address was discontinued years ago. Replace it with the correct hansverk@cisco.com email. Signed-off-by: Hans Verkuil --- include/media/i2c/ths7303.h | 2 +- include/media/media-request.h | 2 +- include/uapi/linux/v4l2-dv-timings.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/i2c/ths7303.h b/include/media/i2c/ths7303.h index fee2818c558d..fc937025cdb4 100644 --- a/include/media/i2c/ths7303.h +++ b/include/media/i2c/ths7303.h @@ -5,7 +5,7 @@ * Copyright 2013 Cisco Systems, Inc. and/or its affiliates. * * Contributors: - * Hans Verkuil + * Hans Verkuil * Lad, Prabhakar * Martin Bugge */ diff --git a/include/media/media-request.h b/include/media/media-request.h index 3cd25a2717ce..d4ac557678a7 100644 --- a/include/media/media-request.h +++ b/include/media/media-request.h @@ -5,7 +5,7 @@ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2018 Intel Corporation * - * Author: Hans Verkuil + * Author: Hans Verkuil * Author: Sakari Ailus */ diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index ef0128c7369c..44a16e0e5a12 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -2,7 +2,7 @@ /* * V4L2 DV timings header. * - * Copyright (C) 2012-2016 Hans Verkuil + * Copyright (C) 2012-2016 Hans Verkuil */ #ifndef _V4L2_DV_TIMINGS_H -- cgit v1.2.3 From b45a3777ceabbe08ab7a6e97f258191c07cbab8d Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 8 Nov 2024 10:13:52 +0800 Subject: iommu: Pass old domain to set_dev_pasid op To support domain replacement for pasid, the underlying iommu driver needs to know the old domain hence be able to clean up the existing attachment. It would be much convenient for iommu layer to pass down the old domain. Otherwise, iommu drivers would need to track domain for pasids by themselves, this would duplicate code among the iommu drivers. Or iommu drivers would rely group->pasid_array to get domain, which may not always the correct one. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Reviewed-by: Nicolin Chen Reviewed-by: Vasant Hegde Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241107122234.7424-2-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..32dce80aa7fd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -642,7 +642,7 @@ struct iommu_ops { struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, - ioasid_t pasid); + ioasid_t pasid, struct iommu_domain *old); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, -- cgit v1.2.3 From 980e3016ebcc00021bf8190c64fd65ba23e90498 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 8 Nov 2024 10:14:04 +0800 Subject: iommu: Make set_dev_pasid op support domain replacement The iommu core is going to support domain replacement for pasid, it needs to make the set_dev_pasid op support replacing domain and keep the old domain config in the failure case. AMD iommu driver does not support domain replacement for pasid yet, so it would fail the set_dev_pasid op to keep the old config if the input @old is non-NULL. Till now, all the set_dev_pasid callbacks can handle the old parameter and can keep the old config when failed, so update the kdoc of set_dev_pasid op. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Vasant Hegde Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241107122234.7424-14-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 32dce80aa7fd..27f923450a7c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -616,7 +616,8 @@ struct iommu_ops { * * EBUSY - device is attached to a domain and cannot be changed * * ENODEV - device specific errors, not able to be attached * * - treated as ENODEV by the caller. Use is discouraged - * @set_dev_pasid: set an iommu domain to a pasid of device + * @set_dev_pasid: set or replace an iommu domain to a pasid of device. The pasid of + * the device should be left in the old config in error case. * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. * @unmap_pages: unmap a number of pages of the same size from an iommu domain -- cgit v1.2.3 From d1f4390dd28ba110f232615dc4610ac1bb2f39f2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 8 Nov 2024 16:07:37 +0200 Subject: regmap: provide regmap_assign_bits() Add another bits helper to regmap API: this one sets given bits if value is true and clears them if it's false. Suggested-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Tomi Valkeinen Link: https://patch.msgid.link/20241108-assign-bits-v1-1-382790562d99@ideasonboard.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index da07584284ab..0d5b74f9bd32 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1335,6 +1335,15 @@ static inline int regmap_clear_bits(struct regmap *map, return regmap_update_bits_base(map, reg, bits, 0, NULL, false, false); } +static inline int regmap_assign_bits(struct regmap *map, unsigned int reg, + unsigned int bits, bool value) +{ + if (value) + return regmap_set_bits(map, reg, bits); + else + return regmap_clear_bits(map, reg, bits); +} + int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits); /** @@ -1803,6 +1812,13 @@ static inline int regmap_clear_bits(struct regmap *map, return -EINVAL; } +static inline int regmap_assign_bits(struct regmap *map, unsigned int reg, + unsigned int bits, bool value) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits) { -- cgit v1.2.3 From 06cf321aadef17c7b1578369e314193c0e1c7d8e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:19 -0600 Subject: range: Add range_overlaps() Code to support CXL Dynamic Capacity devices will have extent ranges which need to be compared for intersection not a subset as is being checked in range_contains(). range_overlaps() is defined in btrfs with a different meaning from what is required in the standard range code. Dan Williams pointed this out in [1]. Adjust the btrfs call according to his suggestion there. Then add a generic range_overlaps(). Cc: Dan Williams Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: linux-btrfs@vger.kernel.org Link: https://lore.kernel.org/all/65949f79ef908_8dc68294f2@dwillia2-xfh.jf.intel.com.notmuch/ [1] Acked-by: David Sterba Reviewed-by: Davidlohr Bueso Reviewed-by: Johannes Thumshirn Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-1-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- include/linux/range.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7ad..876cd5355158 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,11 +13,19 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } +/* True if r1 completely contains r2 */ static inline bool range_contains(struct range *r1, struct range *r2) { return r1->start <= r2->start && r1->end >= r2->end; } +/* True if any part of r1 overlaps r2 */ +static inline bool range_overlaps(const struct range *r1, + const struct range *r2) +{ + return r1->start <= r2->end && r1->end >= r2->start; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); -- cgit v1.2.3 From d62e2ed065785c9a7837519067e1307e4a24d2c2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 7 Nov 2024 14:58:20 -0600 Subject: ACPI/CDAT: Add CDAT/DSMAS shared and read only flag values The Coherent Device Attribute Table (CDAT) Device Scoped Memory Affinity Structure (DSMAS) version 1.04 [1] defines flags to indicate if a DPA range is read only and/or shared. Add read only and shareable flag definitions. This change was merged in ACPICA via PR 976.[2] Link: https://uefi.org/sites/default/files/resources/Coherent%20Device%20Attribute%20Table_1.04%20published_0.pdf [1] Link: https://github.com/acpica/acpica/pull/976 [2] Cc: Robert Moore Cc: Len Brown Cc: Rafael J. Wysocki Cc: linux-acpi@vger.kernel.org Cc: acpica-devel@lists.linux.dev Acked-by: Rafael J. Wysocki Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-2-56a84e66bc36@intel.com Signed-off-by: Dave Jiang --- include/acpi/actbl1.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 199afc2cd122..387fc821703a 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -403,6 +403,8 @@ struct acpi_cdat_dsmas { /* Flags for subtable above */ #define ACPI_CDAT_DSMAS_NON_VOLATILE (1 << 2) +#define ACPI_CDAT_DSMAS_SHAREABLE (1 << 3) +#define ACPI_CDAT_DSMAS_READ_ONLY (1 << 6) /* Subtable 1: Device scoped Latency and Bandwidth Information Structure (DSLBIS) */ -- cgit v1.2.3 From d1b3dad9de7962f7bea861f27e352bac17b68bed Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:17 -0800 Subject: iommufd: Move struct iommufd_object to public iommufd header Prepare for an embedded structure design for driver-level iommufd_viommu objects: // include/linux/iommufd.h struct iommufd_viommu { struct iommufd_object obj; .... }; // Some IOMMU driver struct iommu_driver_viommu { struct iommufd_viommu core; .... }; It has to expose struct iommufd_object and enum iommufd_object_type from the core-level private header to the public iommufd header. Link: https://patch.msgid.link/r/54a43b0768089d690104530754f499ca05ce0074.1730836219.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 30f832a60ccb..22948dd03d67 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,6 +8,7 @@ #include #include +#include #include struct device; @@ -18,6 +19,29 @@ struct iommufd_ctx; struct iommufd_device; struct page; +enum iommufd_object_type { + IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, + IOMMUFD_OBJ_HWPT_PAGING, + IOMMUFD_OBJ_HWPT_NESTED, + IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, +#ifdef CONFIG_IOMMUFD_TEST + IOMMUFD_OBJ_SELFTEST, +#endif + IOMMUFD_OBJ_MAX, +}; + +/* Base struct for all objects with a userspace ID handle. */ +struct iommufd_object { + refcount_t shortterm_users; + refcount_t users; + enum iommufd_object_type type; + unsigned int id; +}; + struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -- cgit v1.2.3 From e8e26a0b09f5783be471b5ffb1e31822b1272c1d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 15 Oct 2024 22:27:31 +0200 Subject: nfs: Annotate struct pnfs_commit_array with __counted_by() Add the __counted_by compiler attribute to the flexible array member buckets to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Compile-tested only. Signed-off-by: Thorsten Blum Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 12d8e47bc5a3..559273a0f16d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1336,7 +1336,7 @@ struct pnfs_commit_array { struct rcu_head rcu; refcount_t refcount; unsigned int nbuckets; - struct pnfs_commit_bucket buckets[]; + struct pnfs_commit_bucket buckets[] __counted_by(nbuckets); }; struct pnfs_ds_commit_info { -- cgit v1.2.3 From 93970b6a143bd9f184ebab37c5862a204fb5690e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Oct 2024 15:21:43 -0400 Subject: sunrpc: remove newlines from tracepoints Tracepoint strings don't require newlines (and in fact, they are undesirable). Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5e8495216689..b13dc275ef4a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -719,7 +719,7 @@ TRACE_EVENT(rpc_xdr_overflow, ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->requested, __entry->p, __entry->end, @@ -777,7 +777,7 @@ TRACE_EVENT(rpc_xdr_alignment, ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->offset, __entry->copied, -- cgit v1.2.3 From e32c260195e6ff72940ab7826e38e0a0066fc58f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Nov 2024 11:49:04 -1000 Subject: sched_ext: Enable the ops breather and eject BPF scheduler on softlockup On 2 x Intel Sapphire Rapids machines with 224 logical CPUs, a poorly behaving BPF scheduler can live-lock the system by making multiple CPUs bang on the same DSQ to the point where soft-lockup detection triggers before SCX's own watchdog can take action. It also seems possible that the machine can be live-locked enough to prevent scx_ops_helper, which is an RT task, from running in a timely manner. Implement scx_softlockup() which is called when three quarters of soft-lockup threshold has passed. The function immediately enables the ops breather and triggers an ops error to initiate ejection of the BPF scheduler. The previous and this patch combined enable the kernel to reliably recover the system from live-lock conditions that can be triggered by a poorly behaving BPF scheduler on Intel dual socket systems. Signed-off-by: Tejun Heo Cc: Douglas Anderson Cc: Andrew Morton --- include/linux/sched/ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1ddbde64a31b..65bc0a489cd2 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -205,11 +205,13 @@ struct sched_ext_entity { void sched_ext_free(struct task_struct *p); void print_scx_info(const char *log_lvl, struct task_struct *p); +void scx_softlockup(u32 dur_s); #else /* !CONFIG_SCHED_CLASS_EXT */ static inline void sched_ext_free(struct task_struct *p) {} static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} +static inline void scx_softlockup(u32 dur_s) {} #endif /* CONFIG_SCHED_CLASS_EXT */ #endif /* _LINUX_SCHED_EXT_H */ -- cgit v1.2.3 From 20fd1383cd616d61b2a79967da1221dc6cfb8430 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 7 Nov 2024 18:52:33 +0000 Subject: iio: Move __private marking before struct element priv in struct iio_dev This is to avoid tripping up kernel-doc which filters it out before but not after the name. Note the formatting is less than ideal as a result so we may revisit in future. Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 59c58f455311..ae65890d4567 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -624,7 +624,7 @@ struct iio_dev { const struct iio_info *info; const struct iio_buffer_setup_ops *setup_ops; - void *priv __private; + void *__private priv; }; int iio_device_id(struct iio_dev *indio_dev); -- cgit v1.2.3 From fffb9fff12250018a6f4d3e411f9d289210da329 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Nov 2024 11:34:24 +0200 Subject: gpio: Get rid of GPIOF_ACTIVE_LOW No more users. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241104093609.156059-7-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 2d105be7bbc3..6270150f4e29 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -21,9 +21,6 @@ struct device; #define GPIOF_OUT_INIT_LOW ((0 << 0) | (0 << 1)) #define GPIOF_OUT_INIT_HIGH ((0 << 0) | (1 << 1)) -/* Gpio pin is active-low */ -#define GPIOF_ACTIVE_LOW (1 << 2) - /** * struct gpio - a structure describing a GPIO with configuration * @gpio: the GPIO number -- cgit v1.2.3 From 580db513b4a9d52f306580015a1872eea0a0894e Mon Sep 17 00:00:00 2001 From: Khang Nguyen Date: Tue, 5 Nov 2024 14:19:15 +0700 Subject: net: mctp: Expose transport binding identifier via IFLA attribute MCTP control protocol implementations are transport binding dependent. Endpoint discovery is mandatory based on transport binding. Message timing requirements are specified in each respective transport binding specification. However, we currently have no means to get this information from MCTP links. Add a IFLA_MCTP_PHYS_BINDING netlink link attribute, which represents the transport type using the DMTF DSP0239-defined type numbers, returned as part of RTM_GETLINK data. We get an IFLA_MCTP_PHYS_BINDING attribute for each MCTP link, for example: - 0x00 (unspec) for loopback interface; - 0x01 (SMBus/I2C) for mctpi2c%d interfaces; and - 0x05 (serial) for mctpserial%d interfaces. Signed-off-by: Khang Nguyen Reviewed-by: Matt Johnston Link: https://patch.msgid.link/20241105071915.821871-1-khangng@os.amperecomputing.com Signed-off-by: Jakub Kicinski --- include/net/mctp.h | 18 ++++++++++++++++++ include/net/mctpdevice.h | 4 +++- include/uapi/linux/if_link.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 28d59ae94ca3..1ecbff7116f6 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -298,4 +298,22 @@ void mctp_routes_exit(void); int mctp_device_init(void); void mctp_device_exit(void); +/* MCTP IDs and Codes from DMTF specification + * "DSP0239 Management Component Transport Protocol (MCTP) IDs and Codes" + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0239_1.11.1.pdf + */ +enum mctp_phys_binding { + MCTP_PHYS_BINDING_UNSPEC = 0x00, + MCTP_PHYS_BINDING_SMBUS = 0x01, + MCTP_PHYS_BINDING_PCIE_VDM = 0x02, + MCTP_PHYS_BINDING_USB = 0x03, + MCTP_PHYS_BINDING_KCS = 0x04, + MCTP_PHYS_BINDING_SERIAL = 0x05, + MCTP_PHYS_BINDING_I3C = 0x06, + MCTP_PHYS_BINDING_MMBI = 0x07, + MCTP_PHYS_BINDING_PCC = 0x08, + MCTP_PHYS_BINDING_UCIE = 0x09, + MCTP_PHYS_BINDING_VENDOR = 0xFF, +}; + #endif /* __NET_MCTP_H */ diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h index 5c0d04b5c12c..957d9ef924c5 100644 --- a/include/net/mctpdevice.h +++ b/include/net/mctpdevice.h @@ -22,6 +22,7 @@ struct mctp_dev { refcount_t refs; unsigned int net; + enum mctp_phys_binding binding; const struct mctp_netdev_ops *ops; @@ -44,7 +45,8 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); struct mctp_dev *__mctp_dev_get(const struct net_device *dev); int mctp_register_netdev(struct net_device *dev, - const struct mctp_netdev_ops *ops); + const struct mctp_netdev_ops *ops, + enum mctp_phys_binding binding); void mctp_unregister_netdev(struct net_device *dev); void mctp_dev_hold(struct mctp_dev *mdev); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8516c1ccd57a..2575e0cd9b48 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1958,6 +1958,7 @@ struct ifla_rmnet_flags { enum { IFLA_MCTP_UNSPEC, IFLA_MCTP_NET, + IFLA_MCTP_PHYS_BINDING, __IFLA_MCTP_MAX, }; -- cgit v1.2.3 From 41b3caa7c0761141aa6d508924b9d23db57a17bc Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:38 +0000 Subject: neighbour: Add hlist_node to struct neighbour Add a doubly-linked node to neighbours, so that they can be deleted without iterating the entire bucket they're in. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-2-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3887ed9e5026..0402447854c7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -136,6 +136,7 @@ struct neigh_statistics { struct neighbour { struct neighbour __rcu *next; + struct hlist_node hash; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; @@ -191,6 +192,7 @@ struct pneigh_entry { struct neigh_hash_table { struct neighbour __rcu **hash_buckets; + struct hlist_head *hash_heads; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; -- cgit v1.2.3 From d7ddee1a522ddf5b28e2a3f7093cf238c96f492a Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:39 +0000 Subject: neighbour: Define neigh_for_each_in_bucket Introduce neigh_for_each_in_bucket in neighbour.h, to help iterate over the neighbour table more succinctly. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-3-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 0402447854c7..4b9068c5e668 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -277,6 +277,12 @@ static inline void *neighbour_priv(const struct neighbour *n) extern const struct nla_policy nda_policy[]; +#define neigh_for_each_in_bucket(pos, head) hlist_for_each_entry(pos, head, hash) +#define neigh_for_each_in_bucket_rcu(pos, head) \ + hlist_for_each_entry_rcu(pos, head, hash) +#define neigh_for_each_in_bucket_safe(pos, tmp, head) \ + hlist_for_each_entry_safe(pos, tmp, head, hash) + static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { return *(const u32 *)n->primary_key == *(const u32 *)pkey; -- cgit v1.2.3 From 0e3bcb0f78a0ca7cfdb7906dc79d922e19ba09b5 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:41 +0000 Subject: neighbour: Convert iteration to use hlist+macro Remove all usage of the bare neighbour::next pointer, replacing them with neighbour::hash and its for_each macro. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-5-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4b9068c5e668..94cf4f8c118f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -311,12 +311,9 @@ static inline struct neighbour *___neigh_lookup_noref( u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - for (n = rcu_dereference(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference(n->next)) { + neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[hash_val]) if (n->dev == dev && key_eq(n, pkey)) return n; - } return NULL; } -- cgit v1.2.3 From a01a67ab2fffa7458354f0a666a6d550fa2b82fc Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:42 +0000 Subject: neighbour: Remove bare neighbour::next pointer Remove the now-unused neighbour::next pointer, leaving struct neighbour solely with the hlist_node implementation. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-6-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 94cf4f8c118f..40aac1e24c68 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -135,7 +135,6 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour __rcu *next; struct hlist_node hash; struct neigh_table *tbl; struct neigh_parms *parms; @@ -191,7 +190,6 @@ struct pneigh_entry { #define NEIGH_NUM_HASH_RND 4 struct neigh_hash_table { - struct neighbour __rcu **hash_buckets; struct hlist_head *hash_heads; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; @@ -354,7 +352,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); -bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); +bool neigh_remove_one(struct neighbour *ndel); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); -- cgit v1.2.3 From f7f52738637f4361c108cad36e23ee98959a9006 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:43 +0000 Subject: neighbour: Create netdev->neighbour association Create a mapping between a netdev and its neighoburs, allowing for much cheaper flushes. Signed-off-by: Gilad Naaman Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 7 +++++++ include/net/neighbour.h | 9 ++------- include/net/neighbour_tables.h | 12 ++++++++++++ 3 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 include/net/neighbour_tables.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3c552b648b27..df4483598628 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -2032,6 +2033,9 @@ enum netdev_reg_state { * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. * + * @neighbours: List heads pointing to this device's neighbours' + * dev_list, one per address-family. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2440,6 +2444,9 @@ struct net_device { */ struct net_shaper_hierarchy *net_shaper_hierarchy; #endif + + struct hlist_head neighbours[NEIGH_NR_TABLES]; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 40aac1e24c68..9a832cab5b1d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -29,6 +29,7 @@ #include #include #include +#include /* * NUD stands for "neighbor unreachability detection" @@ -136,6 +137,7 @@ struct neigh_statistics { struct neighbour { struct hlist_node hash; + struct hlist_node dev_list; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; @@ -236,13 +238,6 @@ struct neigh_table { struct pneigh_entry **phash_buckets; }; -enum { - NEIGH_ARP_TABLE = 0, - NEIGH_ND_TABLE = 1, - NEIGH_NR_TABLES, - NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ -}; - static inline int neigh_parms_family(struct neigh_parms *p) { return p->tbl->family; diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h new file mode 100644 index 000000000000..bcffbe8f7601 --- /dev/null +++ b/include/net/neighbour_tables.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_NEIGHBOUR_TABLES_H +#define _NET_NEIGHBOUR_TABLES_H + +enum { + NEIGH_ARP_TABLE = 0, + NEIGH_ND_TABLE = 1, + NEIGH_NR_TABLES, + NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ +}; + +#endif -- cgit v1.2.3 From 79bc0af904db647979c735563299c9b0d820e432 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 10 Oct 2024 21:35:42 +0200 Subject: hwmon: Add static visibility member to struct hwmon_ops Several drivers return the same static value in their is_visible callback, what results in code duplication. Therefore add an option for drivers to specify a static visibility directly. Signed-off-by: Heiner Kallweit Message-ID: <89690b81-2c73-47ae-9ae9-45c77b45ca0c@gmail.com> groeck: Renamed hwmon_ops_is_visible -> hwmon_is_visible Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 5c6a421ad580..3a63dff62d03 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -368,7 +368,9 @@ enum hwmon_intrusion_attributes { /** * struct hwmon_ops - hwmon device operations - * @is_visible: Callback to return attribute visibility. Mandatory. + * @visible: Static visibility. If non-zero, 'is_visible' is ignored. + * @is_visible: Callback to return attribute visibility. Mandatory unless + * 'visible' is non-zero. * Parameters are: * @const void *drvdata: * Pointer to driver-private data structure passed @@ -412,6 +414,7 @@ enum hwmon_intrusion_attributes { * The function returns 0 on success or a negative error number. */ struct hwmon_ops { + umode_t visible; umode_t (*is_visible)(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel); int (*read)(struct device *dev, enum hwmon_sensor_types type, -- cgit v1.2.3 From 7506ebcd662b868780774d191a7c024c18c557a8 Mon Sep 17 00:00:00 2001 From: Naresh Solanki Date: Mon, 7 Oct 2024 14:34:24 +0530 Subject: hwmon: (max6639) : Configure based on DT property Remove platform data & initialize with defaults configuration & overwrite based on DT properties. Signed-off-by: Naresh Solanki Message-ID: <20241007090426.811736-1-naresh.solanki@9elements.com> [groeck: Dropped some unnecessary empty lines] Signed-off-by: Guenter Roeck --- include/linux/platform_data/max6639.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/platform_data/max6639.h (limited to 'include') diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h deleted file mode 100644 index 65bfdb4fdc15..000000000000 --- a/include/linux/platform_data/max6639.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_MAX6639_H -#define _LINUX_MAX6639_H - -#include - -/* platform data for the MAX6639 temperature sensor and fan control */ - -struct max6639_platform_data { - bool pwm_polarity; /* Polarity low (0) or high (1, default) */ - int ppr; /* Pulses per rotation 1..4 (default == 2) */ - int rpm_range; /* 2000, 4000 (default), 8000 or 16000 */ -}; - -#endif /* _LINUX_MAX6639_H */ -- cgit v1.2.3 From e7ac4daeed91a25382091e73818ea0cddb1afd5e Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 7 Nov 2024 14:12:46 +1300 Subject: mm: count zeromap read and set for swapout and swapin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the proportion of folios from the zeromap is small, missing their accounting may not significantly impact profiling. However, it's easy to construct a scenario where this becomes an issue—for example, allocating 1 GB of memory, writing zeros from userspace, followed by MADV_PAGEOUT, and then swapping it back in. In this case, the swap-out and swap-in counts seem to vanish into a black hole, potentially causing semantic ambiguity. On the other hand, Usama reported that zero-filled pages can exceed 10% in workloads utilizing zswap, while Hailong noted that some app in Android have more than 6% zero-filled pages. Before commit 0ca0c24e3211 ("mm: store zero pages to be swapped out in a bitmap"), both zswap and zRAM implemented similar optimizations, leading to these optimized-out pages being counted in either zswap or zRAM counters (with pswpin/pswpout also increasing for zRAM). With zeromap functioning prior to both zswap and zRAM, userspace will no longer detect these swap-out and swap-in actions. We have three ways to address this: 1. Introduce a dedicated counter specifically for the zeromap. 2. Use pswpin/pswpout accounting, treating the zero map as a standard backend. This approach aligns with zRAM's current handling of same-page fills at the device level. However, it would mean losing the optimized-out page counters previously available in zRAM and would not align with systems using zswap. Additionally, as noted by Nhat Pham, pswpin/pswpout counters apply only to I/O done directly to the backend device. 3. Count zeromap pages under zswap, aligning with system behavior when zswap is enabled. However, this would not be consistent with zRAM, nor would it align with systems lacking both zswap and zRAM. Given the complications with options 2 and 3, this patch selects option 1. We can find these counters from /proc/vmstat (counters for the whole system) and memcg's memory.stat (counters for the interested memcg). For example: $ grep -E 'swpin_zero|swpout_zero' /proc/vmstat swpin_zero 1648 swpout_zero 33536 $ grep -E 'swpin_zero|swpout_zero' /sys/fs/cgroup/system.slice/memory.stat swpin_zero 3905 swpout_zero 3985 This patch does not address any specific zeromap bug, but the missing swpout and swpin counts for zero-filled pages can be highly confusing and may mislead user-space agents that rely on changes in these counters as indicators. Therefore, we add a Fixes tag to encourage the inclusion of this counter in any kernel versions with zeromap. Many thanks to Kanchana for the contribution of changing count_objcg_event() to count_objcg_events() to support large folios[1], which has now been incorporated into this patch. [1] https://lkml.kernel.org/r/20241001053222.6944-5-kanchana.p.sridhar@intel.com Link: https://lkml.kernel.org/r/20241107011246.59137-1-21cnbao@gmail.com Fixes: 0ca0c24e3211 ("mm: store zero pages to be swapped out in a bitmap") Co-developed-by: Kanchana P Sridhar Signed-off-by: Barry Song Reviewed-by: Nhat Pham Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Cc: Usama Arif Cc: Yosry Ahmed Cc: Hailong Liu Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Shakeel Butt Cc: Andi Kleen Cc: Baolin Wang Cc: Chris Li Cc: "Huang, Ying" Cc: Kairui Song Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 12 +++++++----- include/linux/vm_event_item.h | 2 ++ 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 34d2da05f2f1..e1b41554a5fb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1760,8 +1760,9 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) struct mem_cgroup *mem_cgroup_from_slab_obj(void *p); -static inline void count_objcg_event(struct obj_cgroup *objcg, - enum vm_event_item idx) +static inline void count_objcg_events(struct obj_cgroup *objcg, + enum vm_event_item idx, + unsigned long count) { struct mem_cgroup *memcg; @@ -1770,7 +1771,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - count_memcg_events(memcg, idx, 1); + count_memcg_events(memcg, idx, count); rcu_read_unlock(); } @@ -1825,8 +1826,9 @@ static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) return NULL; } -static inline void count_objcg_event(struct obj_cgroup *objcg, - enum vm_event_item idx) +static inline void count_objcg_events(struct obj_cgroup *objcg, + enum vm_event_item idx, + unsigned long count) { } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index aed952d04132..f70d0958095c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -134,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, + SWPIN_ZERO, + SWPOUT_ZERO, #ifdef CONFIG_KSM KSM_SWPIN_COPY, #endif -- cgit v1.2.3 From 69bad21551c9caea8c58800f96da48a704fd311e Mon Sep 17 00:00:00 2001 From: Kanchana P Sridhar Date: Mon, 30 Sep 2024 22:32:16 -0700 Subject: mm: define obj_cgroup_get() if CONFIG_MEMCG is not defined Patch series "mm: zswap swap-out of large folios", v10. This patch series enables zswap_store() to accept and store large folios. The most significant contribution in this series is from the earlier RFC submitted by Ryan Roberts [1]. Ryan's original RFC has been migrated to mm-unstable as of 9-30-2024 in patch 6 of this series, and adapted based on code review comments received for the current patch-series. [1]: [RFC PATCH v1] mm: zswap: Store large folios without splitting https://lore.kernel.org/linux-mm/20231019110543.3284654-1-ryan.roberts@arm.com/T/#u The first few patches do the prep work for supporting large folios in zswap_store. Patch 6 provides the main functionality to swap-out large folios in zswap. Patch 7 adds sysfs per-order hugepages "zswpout" counters that get incremented upon successful zswap_store of large folios, and also updates the documentation for this: /sys/kernel/mm/transparent_hugepage/hugepages-*kB/stats/zswpout This patch series is a prerequisite for zswap compress batching of large folio swap-out and decompress batching of swap-ins based on swapin_readahead(), using Intel IAA hardware acceleration, which we would like to submit in subsequent patch-series, with performance improvement data. Thanks to Ying Huang for pre-posting review feedback and suggestions! Thanks also to Nhat, Yosry, Johannes, Barry, Chengming, Usama, Ying and Matthew for their helpful feedback, code/data reviews and suggestions! Co-development signoff request: =============================== I would like to thank Ryan Roberts for his original RFC [1] and request his co-developer signoff on patch 6 in this series. Thanks Ryan! System setup for testing: ========================= Testing of this patch series was done with mm-unstable as of 9-27-2024, commit de2fbaa6d9c3576ec7133ed02a370ec9376bf000 (without this patch-series) and mm-unstable 9-30-2024 commit c121617e3606be6575cdacfdb63cc8d67b46a568 (with this patch-series). Data was gathered on an Intel Sapphire Rapids server, dual-socket 56 cores per socket, 4 IAA devices per socket, 503 GiB RAM and 525G SSD disk partition swap. Core frequency was fixed at 2500MHz. The vm-scalability "usemem" test was run in a cgroup whose memory.high was fixed at 150G. The is no swap limit set for the cgroup. 30 usemem processes were run, each allocating and writing 10G of memory, and sleeping for 10 sec before exiting: usemem --init-time -w -O -s 10 -n 30 10g Other kernel configuration parameters: zswap compressors : zstd, deflate-iaa zswap allocator : zsmalloc vm.page-cluster : 2 In the experiments where "deflate-iaa" is used as the zswap compressor, IAA "compression verification" is enabled by default (cat /sys/bus/dsa/drivers/crypto/verify_compress). Hence each IAA compression will be decompressed internally by the "iaa_crypto" driver, the crc-s returned by the hardware will be compared and errors reported in case of mismatches. Thus "deflate-iaa" helps ensure better data integrity as compared to the software compressors, and the experimental data listed below is with verify_compress set to "1". Metrics reporting methodology: ============================== Total and average throughput are derived from the individual 30 processes' throughputs reported by usemem. elapsed/sys times are measured with perf. All percentage changes are "new" vs. "old"; hence a positive value denotes an increase in the metric, whether it is throughput or latency, and a negative value denotes a reduction in the metric. Positive throughput change percentages and negative latency change percentages denote improvements. The vm stats and sysfs hugepages stats included with the performance data provide details on the swapout activity to zswap/swap device. Testing labels used in data summaries: ====================================== The data refers to these test configurations and the before/after comparisons that they do: before-case1: ------------- mm-unstable 9-27-2024, CONFIG_THP_SWAP=N (compares zswap 4K vs. zswap 64K) In this scenario, CONFIG_THP_SWAP=N results in 64K/2M folios to be split into 4K folios that get processed by zswap. before-case2: ------------- mm-unstable 9-27-2024, CONFIG_THP_SWAP=Y (compares SSD swap large folios vs. zswap large folios) In this scenario, CONFIG_THP_SWAP=Y results in zswap rejecting large folios, which will then be stored by the SSD swap device. after: ------ v10 of this patch-series, CONFIG_THP_SWAP=Y The "after" is CONFIG_THP_SWAP=Y and v10 of this patch-series, that results in 64K/2M folios to not be split, and to be processed by zswap_store. Regression Testing: =================== I ran vm-scalability usemem without large folios, i.e., only 4K folios with mm-unstable and this patch-series. The main goal was to make sure that there is no functional or performance regression wrt the earlier zswap behavior for 4K folios, now that 4K folios will be processed by the new zswap_store() code. The data indicates there is no significant regression. ------------------------------------------------------------------------------- 4K folios: ========== zswap compressor zstd zstd zstd zstd v10 before-case1 before-case2 after vs. vs. case1 case2 ------------------------------------------------------------------------------- Total throughput (KB/s) 4,793,363 4,880,978 4,853,074 1% -1% Average throughput (KB/s) 159,778 162,699 161,769 1% -1% elapsed time (sec) 130.14 123.17 126.29 -3% 3% sys time (sec) 3,135.53 2,985.64 3,083.18 -2% 3% memcg_high 446,826 444,626 452,930 memcg_swap_fail 0 0 0 zswpout 48,932,107 48,931,971 48,931,820 zswpin 383 386 397 pswpout 0 0 0 pswpin 0 0 0 thp_swpout 0 0 0 thp_swpout_fallback 0 0 0 64kB-mthp_swpout_fallback 0 0 0 pgmajfault 3,063 3,077 3,479 swap_ra 93 94 96 swap_ra_hit 47 47 50 ZSWPOUT-64kB n/a n/a 0 SWPOUT-64kB 0 0 0 ------------------------------------------------------------------------------- Performance Testing: ==================== We list the data for 64K folios with before/after data per-compressor, followed by the same for 2M pmd-mappable folios. ------------------------------------------------------------------------------- 64K folios: zstd: ================= zswap compressor zstd zstd zstd zstd v10 before-case1 before-case2 after vs. vs. case1 case2 ------------------------------------------------------------------------------- Total throughput (KB/s) 5,222,213 1,076,611 6,159,776 18% 472% Average throughput (KB/s) 174,073 35,887 205,325 18% 472% elapsed time (sec) 120.50 347.16 108.33 -10% -69% sys time (sec) 2,930.33 248.16 2,549.65 -13% 927% memcg_high 416,773 552,200 465,874 memcg_swap_fail 3,192,906 1,293 1,012 zswpout 48,931,583 20,903 48,931,218 zswpin 384 363 410 pswpout 0 40,778,448 0 pswpin 0 16 0 thp_swpout 0 0 0 thp_swpout_fallback 0 0 0 64kB-mthp_swpout_fallback 3,192,906 1,293 1,012 pgmajfault 3,452 3,072 3,061 swap_ra 90 87 107 swap_ra_hit 42 43 57 ZSWPOUT-64kB n/a n/a 3,057,173 SWPOUT-64kB 0 2,548,653 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- 64K folios: deflate-iaa: ======================== zswap compressor deflate-iaa deflate-iaa deflate-iaa deflate-iaa v10 before-case1 before-case2 after vs. vs. case1 case2 ------------------------------------------------------------------------------- Total throughput (KB/s) 5,652,608 1,089,180 7,189,778 27% 560% Average throughput (KB/s) 188,420 36,306 239,659 27% 560% elapsed time (sec) 102.90 343.35 87.05 -15% -75% sys time (sec) 2,246.86 213.53 1,864.16 -17% 773% memcg_high 576,104 502,907 642,083 memcg_swap_fail 4,016,117 1,407 1,478 zswpout 61,163,423 22,444 57,798,716 zswpin 401 368 454 pswpout 0 40,862,080 0 pswpin 0 20 0 thp_swpout 0 0 0 thp_swpout_fallback 0 0 0 64kB-mthp_swpout_fallback 4,016,117 1,407 1,478 pgmajfault 3,063 3,153 3,122 swap_ra 96 93 156 swap_ra_hit 46 45 83 ZSWPOUT-64kB n/a n/a 3,611,032 SWPOUT-64kB 0 2,553,880 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- 2M folios: zstd: ================ zswap compressor zstd zstd zstd zstd v10 before-case1 before-case2 after vs. vs. case1 case2 ------------------------------------------------------------------------------- Total throughput (KB/s) 5,895,500 1,109,694 6,484,224 10% 484% Average throughput (KB/s) 196,516 36,989 216,140 10% 484% elapsed time (sec) 108.77 334.28 106.33 -2% -68% sys time (sec) 2,657.14 94.88 2,376.13 -11% 2404% memcg_high 64,200 66,316 56,898 memcg_swap_fail 101,182 70 27 zswpout 48,931,499 36,507 48,890,640 zswpin 380 379 377 pswpout 0 40,166,400 0 pswpin 0 0 0 thp_swpout 0 78,450 0 thp_swpout_fallback 101,182 70 27 2MB-mthp_swpout_fallback 0 0 27 pgmajfault 3,067 3,417 3,311 swap_ra 91 90 854 swap_ra_hit 45 45 810 ZSWPOUT-2MB n/a n/a 95,459 SWPOUT-2MB 0 78,450 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- 2M folios: deflate-iaa: ======================= zswap compressor deflate-iaa deflate-iaa deflate-iaa deflate-iaa v10 before-case1 before-case2 after vs. vs. case1 case2 ------------------------------------------------------------------------------- Total throughput (KB/s) 6,286,587 1,126,785 7,073,464 13% 528% Average throughput (KB/s) 209,552 37,559 235,782 13% 528% elapsed time (sec) 96.19 333.03 85.79 -11% -74% sys time (sec) 2,141.44 99.96 1,826.67 -15% 1727% memcg_high 99,253 64,666 79,718 memcg_swap_fail 129,074 53 165 zswpout 61,312,794 28,321 56,045,120 zswpin 383 406 403 pswpout 0 40,048,128 0 pswpin 0 0 0 thp_swpout 0 78,219 0 thp_swpout_fallback 129,074 53 165 2MB-mthp_swpout_fallback 0 0 165 pgmajfault 3,430 3,077 31,468 swap_ra 91 103 84,373 swap_ra_hit 47 46 84,317 ZSWPOUT-2MB n/a n/a 109,229 SWPOUT-2MB 0 78,219 0 ------------------------------------------------------------------------------- And finally, this is a comparison of deflate-iaa vs. zstd with v10 of this patch-series: --------------------------------------------- zswap_store large folios v10 Impr w/ deflate-iaa vs. zstd 64K folios 2M folios --------------------------------------------- Throughput (KB/s) 17% 9% elapsed time (sec) -20% -19% sys time (sec) -27% -23% --------------------------------------------- Conclusions based on the performance results: ============================================= v10 wrt before-case1: --------------------- We see significant improvements in throughput, elapsed and sys time for zstd and deflate-iaa, when comparing before-case1 (THP_SWAP=N) vs. after (THP_SWAP=Y) with zswap_store large folios. v10 wrt before-case2: --------------------- We see even more significant improvements in throughput and elapsed time for zstd and deflate-iaa, when comparing before-case2 (large-folio-SSD) vs. after (large-folio-zswap). The sys time increases with large-folio-zswap as expected, due to the CPU compression time vs. asynchronous disk write times, as pointed out by Ying and Yosry. In before-case2, when zswap does not store large folios, only allocations and cgroup charging due to 4K folio zswap stores count towards the cgroup memory limit. However, in the after scenario, with the introduction of zswap_store() of large folios, there is an added component of the zswap compressed pool usage from large folio stores from potentially all 30 processes, that gets counted towards the memory limit. As a result, we see higher swapout activity in the "after" data. Summary: ======== The v10 data presented above shows that zswap_store of large folios demonstrates good throughput/performance improvements compared to conventional SSD swap of large folios with a sufficiently large 525G SSD swap device. Hence, it seems reasonable for zswap_store to support large folios, so that further performance improvements can be implemented. In the experimental setup used in this patchset, we have enabled IAA compress verification to ensure additional hardware data integrity CRC checks not currently done by the software compressors. We see good throughput/latency improvements with deflate-iaa vs. zstd with zswap_store of large folios. Some of the ideas for further reducing latency that have shown promise in our experiments, are: 1) IAA compress/decompress batching. 2) Distributing compress jobs across all IAA devices on the socket. The tests run for this patchset are using only 1 IAA device per core, that avails of 2 compress engines on the device. In our experiments with IAA batching, we distribute compress jobs from all cores to the 8 compress engines available per socket. We further compress the pages in each folio in parallel in the accelerator. As a result, we improve compress latency and reclaim throughput. In decompress batching, we use swapin_readahead to generate a prefetch batch of 4K folios that we decompress in parallel in IAA. ------------------------------------------------------------------------------ IAA compress/decompress batching Further improvements wrt v10 zswap_store Sequential subpage store using "deflate-iaa": "deflate-iaa" Batching "deflate-iaa-canned" [2] Batching Additional Impr Additional Impr 64K folios 2M folios 64K folios 2M folios ------------------------------------------------------------------------------ Throughput (KB/s) 19% 43% 26% 55% elapsed time (sec) -5% -14% -10% -21% sys time (sec) 4% -7% -4% -18% ------------------------------------------------------------------------------ With zswap IAA compress/decompress batching, we are able to demonstrate significant performance improvements and memory savings in server scalability experiments in highly contended system scenarios under significant memory pressure; as compared to software compressors. We hope to submit this work in subsequent patch series. The current patch-series is a prequisite for these future submissions. [1] https://lore.kernel.org/linux-mm/20231019110543.3284654-1-ryan.roberts@arm.com/T/#u [2] https://patchwork.kernel.org/project/linux-crypto/cover/cover.1710969449.git.andre.glover@linux.intel.com/ This patch (of 6): This resolves an issue with obj_cgroup_get() not being defined if CONFIG_MEMCG is not defined. Before this patch, we would see build errors if obj_cgroup_get() is called from code that is agnostic of CONFIG_MEMCG. The zswap_store() changes for large folios in subsequent commits will require the use of obj_cgroup_get() in zswap code that falls into this category. Link: https://lkml.kernel.org/r/20241001053222.6944-1-kanchana.p.sridhar@intel.com Link: https://lkml.kernel.org/r/20241001053222.6944-2-kanchana.p.sridhar@intel.com Signed-off-by: Kanchana P Sridhar Reviewed-by: Nhat Pham Reviewed-by: Yosry Ahmed Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Cc: "Huang, Ying" Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Shakeel Butt Cc: Usama Arif Cc: Wajdi Feghali Cc: "Zou, Nanhai" Cc: Barry Song <21cnbao@gmail.com> Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0bd8f61a5597..5502aa8e138e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1233,6 +1233,10 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) return NULL; } +static inline void obj_cgroup_get(struct obj_cgroup *objcg) +{ +} + static inline void obj_cgroup_put(struct obj_cgroup *objcg) { } -- cgit v1.2.3 From 6e1fa555ec772046ec3b903f507ff7fed5323796 Mon Sep 17 00:00:00 2001 From: Kanchana P Sridhar Date: Mon, 30 Sep 2024 22:32:20 -0700 Subject: mm: zswap: modify zswap_stored_pages to be atomic_long_t For zswap_store() to support large folios, we need to be able to do a batch update of zswap_stored_pages upon successful store of all pages in the folio. For this, we need to add folio_nr_pages(), which returns a long, to zswap_stored_pages. Link: https://lkml.kernel.org/r/20241001053222.6944-6-kanchana.p.sridhar@intel.com Signed-off-by: Kanchana P Sridhar Acked-by: Yosry Ahmed Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Cc: Chengming Zhou Cc: "Huang, Ying" Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Shakeel Butt Cc: Usama Arif Cc: Wajdi Feghali Cc: "Zou, Nanhai" Cc: Barry Song <21cnbao@gmail.com> Signed-off-by: Andrew Morton --- include/linux/zswap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 9cd1beef0654..d961ead91bf1 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -7,7 +7,7 @@ struct lruvec; -extern atomic_t zswap_stored_pages; +extern atomic_long_t zswap_stored_pages; #ifdef CONFIG_ZSWAP -- cgit v1.2.3 From 0c560dd86040556a9e55d88229d9295672428c78 Mon Sep 17 00:00:00 2001 From: Kanchana P Sridhar Date: Mon, 30 Sep 2024 22:32:22 -0700 Subject: mm: swap: count successful large folio zswap stores in hugepage zswpout stats Added a new MTHP_STAT_ZSWPOUT entry to the sysfs transparent_hugepage stats so that successful large folio zswap stores can be accounted under the per-order sysfs "zswpout" stats: /sys/kernel/mm/transparent_hugepage/hugepages-*kB/stats/zswpout Other non-zswap swap device swap-out events will be counted under the existing sysfs "swpout" stats: /sys/kernel/mm/transparent_hugepage/hugepages-*kB/stats/swpout Also, added documentation for the newly added sysfs per-order hugepage "zswpout" stats. The documentation clarifies that only non-zswap swapouts will be accounted in the existing "swpout" stats. Link: https://lkml.kernel.org/r/20241001053222.6944-8-kanchana.p.sridhar@intel.com Signed-off-by: Kanchana P Sridhar Reviewed-by: Nhat Pham Cc: Chengming Zhou Cc: "Huang, Ying" Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Shakeel Butt Cc: Usama Arif Cc: Wajdi Feghali Cc: Yosry Ahmed Cc: "Zou, Nanhai" Cc: Barry Song <21cnbao@gmail.com> Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 006f730545c2..c59e5aa9b081 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -119,6 +119,7 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_ALLOC, MTHP_STAT_ANON_FAULT_FALLBACK, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_ZSWPOUT, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, MTHP_STAT_SHMEM_ALLOC, -- cgit v1.2.3 From aaf2914aec0fa67395574f6fa6b726168b049e60 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 26 Oct 2024 21:24:23 +1300 Subject: mm: add per-order mTHP swpin counters This helps profile the sizes of folios being swapped in. Currently, only mTHP swap-out is being counted. The new interface can be found at: /sys/kernel/mm/transparent_hugepage/hugepages-/stats swpin For example, cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpin 12809 cat /sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpin 4763 [v-songbaohua@oppo.com: add a blank line in doc] Link: https://lkml.kernel.org/r/20241030233423.80759-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20241026082423.26298-1-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Baolin Wang Acked-by: David Hildenbrand Cc: Chris Li Cc: Yosry Ahmed Cc: "Huang, Ying" Cc: Kairui Song Cc: Ryan Roberts Cc: Kanchana P Sridhar Cc: Usama Arif Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index c59e5aa9b081..b94c2e8ee918 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -120,6 +120,7 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_FALLBACK, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, MTHP_STAT_ZSWPOUT, + MTHP_STAT_SWPIN, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, MTHP_STAT_SHMEM_ALLOC, -- cgit v1.2.3 From 5f6170a469cd2c13ad4dffe42714cf777b132451 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 28 Oct 2024 14:13:27 +0000 Subject: mm: pagewalk: add the ability to install PTEs Patch series "implement lightweight guard pages", v4. Userland library functions such as allocators and threading implementations often require regions of memory to act as 'guard pages' - mappings which, when accessed, result in a fatal signal being sent to the accessing process. The current means by which these are implemented is via a PROT_NONE mmap() mapping, which provides the required semantics however incur an overhead of a VMA for each such region. With a great many processes and threads, this can rapidly add up and incur a significant memory penalty. It also has the added problem of preventing merges that might otherwise be permitted. This series takes a different approach - an idea suggested by Vlastimil Babka (and before him David Hildenbrand and Jann Horn - perhaps more - the provenance becomes a little tricky to ascertain after this - please forgive any omissions!) - rather than locating the guard pages at the VMA layer, instead placing them in page tables mapping the required ranges. Early testing of the prototype version of this code suggests a 5 times speed up in memory mapping invocations (in conjunction with use of process_madvise()) and a 13% reduction in VMAs on an entirely idle android system and unoptimised code. We expect with optimisation and a loaded system with a larger number of guard pages this could significantly increase, but in any case these numbers are encouraging. This way, rather than having separate VMAs specifying which parts of a range are guard pages, instead we have a VMA spanning the entire range of memory a user is permitted to access and including ranges which are to be 'guarded'. After mapping this, a user can specify which parts of the range should result in a fatal signal when accessed. By restricting the ability to specify guard pages to memory mapped by existing VMAs, we can rely on the mappings being torn down when the mappings are ultimately unmapped and everything works simply as if the memory were not faulted in, from the point of view of the containing VMAs. This mechanism in effect poisons memory ranges similar to hardware memory poisoning, only it is an entirely software-controlled form of poisoning. The mechanism is implemented via madvise() behaviour - MADV_GUARD_INSTALL which installs page table-level guard page markers - and MADV_GUARD_REMOVE - which clears them. Guard markers can be installed across multiple VMAs and any existing mappings will be cleared, that is zapped, before installing the guard page markers in the page tables. There is no concept of 'nested' guard markers, multiple attempts to install guard markers in a range will, after the first attempt, have no effect. Importantly, removing guard markers over a range that contains both guard markers and ordinary backed memory has no effect on anything but the guard markers (including leaving huge pages un-split), so a user can safely remove guard markers over a range of memory leaving the rest intact. The actual mechanism by which the page table entries are specified makes use of existing logic - PTE markers, which are used for the userfaultfd UFFDIO_POISON mechanism. Unfortunately PTE_MARKER_POISONED is not suited for the guard page mechanism as it results in VM_FAULT_HWPOISON semantics in the fault handler, so we add our own specific PTE_MARKER_GUARD and adapt existing logic to handle it. We also extend the generic page walk mechanism to allow for installation of PTEs (carefully restricted to memory management logic only to prevent unwanted abuse). We ensure that zapping performed by MADV_DONTNEED and MADV_FREE do not remove guard markers, nor does forking (except when VM_WIPEONFORK is specified for a VMA which implies a total removal of memory characteristics). It's important to note that the guard page implementation is emphatically NOT a security feature, so a user can remove the markers if they wish. We simply implement it in such a way as to provide the least surprising behaviour. An extensive set of self-tests are provided which ensure behaviour is as expected and additionally self-documents expected behaviour of guard ranges. This patch (of 5): The existing generic pagewalk logic permits the walking of page tables, invoking callbacks at individual page table levels via user-provided mm_walk_ops callbacks. This is useful for traversing existing page table entries, but precludes the ability to establish new ones. Existing mechanism for performing a walk which also installs page table entries if necessary are heavily duplicated throughout the kernel, each with semantic differences from one another and largely unavailable for use elsewhere. Rather than add yet another implementation, we extend the generic pagewalk logic to enable the installation of page table entries by adding a new install_pte() callback in mm_walk_ops. If this is specified, then upon encountering a missing page table entry, we allocate and install a new one and continue the traversal. If a THP huge page is encountered at either the PMD or PUD level we split it only if there are ops->pte_entry() (or ops->pmd_entry at PUD level), otherwise if there is only an ops->install_pte(), we avoid the unnecessary split. We do not support hugetlb at this stage. If this function returns an error, or an allocation fails during the operation, we abort the operation altogether. It is up to the caller to deal appropriately with partially populated page table ranges. If install_pte() is defined, the semantics of pte_entry() change - this callback is then only invoked if the entry already exists. This is a useful property, as it allows a caller to handle existing PTEs while installing new ones where necessary in the specified range. If install_pte() is not defined, then there is no functional difference to this patch, so all existing logic will work precisely as it did before. As we only permit the installation of PTEs where a mapping does not already exist there is no need for TLB management, however we do invoke update_mmu_cache() for architectures which require manual maintenance of mappings for other CPUs. We explicitly do not allow the existing page walk API to expose this feature as it is dangerous and intended for internal mm use only. Therefore we provide a new walk_page_range_mm() function exposed only to mm/internal.h. We take the opportunity to additionally clean up the page walker logic to be a little easier to follow. Link: https://lkml.kernel.org/r/cover.1730123433.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/51b432ebef013e3fdf9f92101533435de1bffadf.1730123433.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Jann Horn Reviewed-by: Vlastimil Babka Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Cc: Arnd Bergmann Cc: Christian Brauner Cc: Christoph Hellwig Cc: Chris Zankel Cc: Helge Deller Cc: James E.J. Bottomley Cc: Jeff Xu Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Matt Turner Cc: Max Filippov Cc: Muchun Song Cc: Paul E. McKenney Cc: Richard Henderson Cc: Shuah Khan Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index f5eb5a32aeed..9700a29f8afb 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -25,12 +25,15 @@ enum page_walk_lock { * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to * split_huge_page() instead of handling it explicitly. - * @pte_entry: if set, called for each PTE (lowest-level) entry, - * including empty ones + * @pte_entry: if set, called for each PTE (lowest-level) entry + * including empty ones, except if @install_pte is set. + * If @install_pte is set, @pte_entry is called only for + * existing PTEs. * @pte_hole: if set, called for each hole at all levels, * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. * Any folded depths (where PTRS_PER_P?D is equal to 1) - * are skipped. + * are skipped. If @install_pte is specified, this will + * not trigger for any populated ranges. * @hugetlb_entry: if set, called for each hugetlb entry. This hook * function is called with the vma lock held, in order to * protect against a concurrent freeing of the pte_t* or @@ -51,6 +54,13 @@ enum page_walk_lock { * @pre_vma: if set, called before starting walk on a non-null vma. * @post_vma: if set, called after a walk on a non-null vma, provided * that @pre_vma and the vma walk succeeded. + * @install_pte: if set, missing page table entries are installed and + * thus all levels are always walked in the specified + * range. This callback is then invoked at the PTE level + * (having split any THP pages prior), providing the PTE to + * install. If allocations fail, the walk is aborted. This + * operation is only available for userland memory. Not + * usable for hugetlb ranges. * * p?d_entry callbacks are called even if those levels are folded on a * particular architecture/configuration. @@ -76,6 +86,8 @@ struct mm_walk_ops { int (*pre_vma)(unsigned long start, unsigned long end, struct mm_walk *walk); void (*post_vma)(struct mm_walk *walk); + int (*install_pte)(unsigned long addr, unsigned long next, + pte_t *ptep, struct mm_walk *walk); enum page_walk_lock walk_lock; }; -- cgit v1.2.3 From 7c53dfbdb024915f23f03f972b33744309d4608b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 28 Oct 2024 14:13:28 +0000 Subject: mm: add PTE_MARKER_GUARD PTE marker Add a new PTE marker that results in any access causing the accessing process to segfault. This is preferable to PTE_MARKER_POISONED, which results in the same handling as hardware poisoned memory, and is thus undesirable for cases where we simply wish to 'soft' poison a range. This is in preparation for implementing the ability to specify guard pages at the page table level, i.e. ranges that, when accessed, should cause process termination. Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single purpose was simply incorrect. We then reuse the same logic to determine whether a zap should clear a guard entry - this should only be performed on teardown and never on MADV_DONTNEED or MADV_FREE. We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard marker be encountered there, as we explicitly do not support this operation and this should not occur. Link: https://lkml.kernel.org/r/f47f3d5acca2dcf9bbf655b6d33f3dc713e4a4a0.1730123433.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Cc: Arnd Bergmann Cc: Christian Brauner Cc: Christoph Hellwig Cc: Chris Zankel Cc: Helge Deller Cc: James E.J. Bottomley Cc: Jeff Xu Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Matt Turner Cc: Max Filippov Cc: Muchun Song Cc: Paul E. McKenney Cc: Richard Henderson Cc: Shuah Khan Cc: Shuah Khan Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 2 +- include/linux/swapops.h | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355cf46a01a6..1b6a917fffa4 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker( { pte_marker srcm = pte_marker_get(entry); /* Always copy error entries. */ - pte_marker dstm = srcm & PTE_MARKER_POISONED; + pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD); /* Only copy PTE markers if UFFD register matches. */ if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma)) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cb468e418ea1..96f26e29fefe 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -426,9 +426,19 @@ typedef unsigned long pte_marker; * "Poisoned" here is meant in the very general sense of "future accesses are * invalid", instead of referring very specifically to hardware memory errors. * This marker is meant to represent any of various different causes of this. + * + * Note that, when encountered by the faulting logic, PTEs with this marker will + * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error + * logic. */ #define PTE_MARKER_POISONED BIT(1) -#define PTE_MARKER_MASK (BIT(2) - 1) +/* + * Indicates that, on fault, this PTE will case a SIGSEGV signal to be + * sent. This means guard markers behave in effect as if the region were mapped + * PROT_NONE, rather than if they were a memory hole or equivalent. + */ +#define PTE_MARKER_GUARD BIT(2) +#define PTE_MARKER_MASK (BIT(3) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { @@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_POISONED); + +} + +static inline swp_entry_t make_guard_swp_entry(void) +{ + return make_pte_marker_entry(PTE_MARKER_GUARD); +} + +static inline int is_guard_swp_entry(swp_entry_t entry) +{ + return is_pte_marker_entry(entry) && + (pte_marker_get(entry) & PTE_MARKER_GUARD); } /* -- cgit v1.2.3 From 662df3e5c37666d6ed75c88098699e070a4b35b5 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 28 Oct 2024 14:13:29 +0000 Subject: mm: madvise: implement lightweight guard page mechanism Implement a new lightweight guard page feature, that is regions of userland virtual memory that, when accessed, cause a fatal signal to arise. Currently users must establish PROT_NONE ranges to achieve this. However this is very costly memory-wise - we need a VMA for each and every one of these regions AND they become unmergeable with surrounding VMAs. In addition repeated mmap() calls require repeated kernel context switches and contention of the mmap lock to install these ranges, potentially also having to unmap memory if installed over existing ranges. The lightweight guard approach eliminates the VMA cost altogether - rather than establishing a PROT_NONE VMA, it operates at the level of page table entries - establishing PTE markers such that accesses to them cause a fault followed by a SIGSGEV signal being raised. This is achieved through the PTE marker mechanism, which we have already extended to provide PTE_MARKER_GUARD, which we installed via the generic page walking logic which we have extended for this purpose. These guard ranges are established with MADV_GUARD_INSTALL. If the range in which they are installed contain any existing mappings, they will be zapped, i.e. free the range and unmap memory (thus mimicking the behaviour of MADV_DONTNEED in this respect). Any existing guard entries will be left untouched. There is therefore no nesting of guarded pages. Guarded ranges are NOT cleared by MADV_DONTNEED nor MADV_FREE (in both instances the memory range may be reused at which point a user would expect guards to still be in place), but they are cleared via MADV_GUARD_REMOVE, process teardown or unmapping of memory ranges. The guard property can be removed from ranges via MADV_GUARD_REMOVE. The ranges over which this is applied, should they contain non-guard entries, will be untouched, with only guard entries being cleared. We permit this operation on anonymous memory only, and only VMAs which are non-special, non-huge and not mlock()'d (if we permitted this we'd have to drop locked pages which would be rather counterintuitive). Racing page faults can cause repeated attempts to install guard pages that are interrupted, result in a zap, and this process can end up being repeated. If this happens more than would be expected in normal operation, we rescind locks and retry the whole thing, which avoids lock contention in this scenario. Link: https://lkml.kernel.org/r/6aafb5821bf209f277dfae0787abb2ef87a37542.1730123433.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Arnd Bergmann Cc: Christian Brauner Cc: Christoph Hellwig Cc: Chris Zankel Cc: Helge Deller Cc: James E.J. Bottomley Cc: Jeff Xu Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Matt Turner Cc: Max Filippov Cc: Muchun Song Cc: Paul E. McKenney Cc: Richard Henderson Cc: Shuah Khan Cc: Shuah Khan Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/uapi/asm-generic/mman-common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 6ce1f1ceb432..1ea2c4c33b86 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -79,6 +79,9 @@ #define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ +#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */ +#define MADV_GUARD_REMOVE 103 /* unguard range */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit v1.2.3 From ab6e8e74e47362bd9d79dd4394a167b2afe0cc77 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 27 Oct 2024 13:14:42 -0700 Subject: mm: delete the unused put_pages_list() The last user of put_pages_list() converted away from it in 6.10 commit 06c375053cef ("iommu/vt-d: add wrapper functions for page allocations"): delete put_pages_list(). Link: https://lkml.kernel.org/r/d9985d6a-293e-176b-e63d-82fdfd28c139@google.com Signed-off-by: Hugh Dickins Acked-by: Peter Xu Acked-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Matthew Wilcox (Oracle) Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 78848fbefe94..32888a97ab44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1286,8 +1286,6 @@ static inline struct folio *virt_to_folio(const void *x) void __folio_put(struct folio *folio); -void put_pages_list(struct list_head *pages); - void split_page(struct page *page, unsigned int order); void folio_copy(struct folio *dst, struct folio *src); int folio_mc_copy(struct folio *dst, struct folio *src); -- cgit v1.2.3 From f914ac96ee8828368f5a24553e75216d76da0b42 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Mon, 28 Oct 2024 19:11:06 -0700 Subject: memcg: add flush tracepoint This tracepoint gives visibility on how often the flushing of memcg stats occurs and contains info on whether it was forced, skipped, and the value of stats updated. It can help with understanding how readers are affected by having to perform the flush, and the effectiveness of the flush by inspecting the number of stats updated. Paired with the recently added tracepoints for tracing rstat updates, it can also help show correlation where stats exceed thresholds frequently. Link: https://lkml.kernel.org/r/20241029021106.25587-3-inwardvessel@gmail.com Signed-off-by: JP Kobryn Reviewed-by: Yosry Ahmed Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/trace/events/memcg.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/trace/events/memcg.h b/include/trace/events/memcg.h index 8667e57816d2..dfe2f51019b4 100644 --- a/include/trace/events/memcg.h +++ b/include/trace/events/memcg.h @@ -74,6 +74,31 @@ DEFINE_EVENT(memcg_rstat_events, count_memcg_events, TP_ARGS(memcg, item, val) ); +TRACE_EVENT(memcg_flush_stats, + + TP_PROTO(struct mem_cgroup *memcg, s64 stats_updates, + bool force, bool needs_flush), + + TP_ARGS(memcg, stats_updates, force, needs_flush), + + TP_STRUCT__entry( + __field(u64, id) + __field(s64, stats_updates) + __field(bool, force) + __field(bool, needs_flush) + ), + + TP_fast_assign( + __entry->id = cgroup_id(memcg->css.cgroup); + __entry->stats_updates = stats_updates; + __entry->force = force; + __entry->needs_flush = needs_flush; + ), + + TP_printk("memcg_id=%llu stats_updates=%lld force=%d needs_flush=%d", + __entry->id, __entry->stats_updates, + __entry->force, __entry->needs_flush) +); #endif /* _TRACE_MEMCG_H */ -- cgit v1.2.3 From 48b50624aec454ce0fa8f78ef96e2f43bc0be495 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Oct 2024 12:33:38 +0200 Subject: backing-file: clean up the API - Pass iocb to ctx->end_write() instead of file + pos - Get rid of ctx->user_file, which is redundant most of the time - Instead pass iocb to backing_file_splice_read and backing_file_splice_write Signed-off-by: Miklos Szeredi Signed-off-by: Amir Goldstein --- include/linux/backing-file.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 2eed0ffb5e8f..1476a6ed1bfd 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -14,9 +14,8 @@ struct backing_file_ctx { const struct cred *cred; - struct file *user_file; - void (*accessed)(struct file *); - void (*end_write)(struct file *, loff_t, ssize_t); + void (*accessed)(struct file *file); + void (*end_write)(struct kiocb *iocb, ssize_t); }; struct file *backing_file_open(const struct path *user_path, int flags, @@ -31,13 +30,13 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, struct kiocb *iocb, int flags, struct backing_file_ctx *ctx); -ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, +ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb, struct pipe_inode_info *pipe, size_t len, unsigned int flags, struct backing_file_ctx *ctx); ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, size_t len, - unsigned int flags, + struct file *out, struct kiocb *iocb, + size_t len, unsigned int flags, struct backing_file_ctx *ctx); int backing_file_mmap(struct file *file, struct vm_area_struct *vma, struct backing_file_ctx *ctx); -- cgit v1.2.3 From 49dffdfde462c7823de6ed882f71ce233aaeba63 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 6 Nov 2024 16:57:17 -0800 Subject: cred: Add a light version of override/revert_creds() Add a light version of override/revert_creds(), this should only be used when the credentials in question will outlive the critical section and the critical section doesn't change the ->usage of the credentials. Suggested-by: Christian Brauner Signed-off-by: Vinicius Costa Gomes Signed-off-by: Amir Goldstein --- include/linux/cred.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 2976f534a7a3..e4a3155fe409 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -172,6 +172,24 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } +/* + * Override creds without bumping reference count. Caller must ensure + * reference remains valid or has taken reference. Almost always not the + * interface you want. Use override_creds()/revert_creds() instead. + */ +static inline const struct cred *override_creds_light(const struct cred *override_cred) +{ + const struct cred *old = current->cred; + + rcu_assign_pointer(current->cred, override_cred); + return old; +} + +static inline void revert_creds_light(const struct cred *revert_cred) +{ + rcu_assign_pointer(current->cred, revert_cred); +} + /** * get_new_cred_many - Get references on a new set of credentials * @cred: The new credentials to reference -- cgit v1.2.3 From 5c2e7736e20d9b348a44cafbfa639fe2653fbc34 Mon Sep 17 00:00:00 2001 From: Eder Zulian Date: Thu, 7 Nov 2024 17:32:23 +0100 Subject: rust: helpers: Avoid raw_spin_lock initialization for PREEMPT_RT When PREEMPT_RT=y, spin locks are mapped to rt_mutex types, so using spinlock_check() + __raw_spin_lock_init() to initialize spin locks is incorrect, and would cause build errors. Introduce __spin_lock_init() to initialize a spin lock with lockdep rquired information for PREEMPT_RT builds, and use it in the Rust helper. Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Closes: https://lore.kernel.org/oe-kbuild-all/202409251238.vetlgXE9-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Eder Zulian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boqun Feng Tested-by: Boqun Feng Link: https://lore.kernel.org/r/20241107163223.2092690-2-ezulian@redhat.com --- include/linux/spinlock_rt.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index f9f14e135be7..f6499c37157d 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -16,22 +16,21 @@ static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name, } #endif -#define spin_lock_init(slock) \ +#define __spin_lock_init(slock, name, key, percpu) \ do { \ - static struct lock_class_key __key; \ - \ rt_mutex_base_init(&(slock)->lock); \ - __rt_spin_lock_init(slock, #slock, &__key, false); \ + __rt_spin_lock_init(slock, name, key, percpu); \ } while (0) -#define local_spin_lock_init(slock) \ +#define _spin_lock_init(slock, percpu) \ do { \ static struct lock_class_key __key; \ - \ - rt_mutex_base_init(&(slock)->lock); \ - __rt_spin_lock_init(slock, #slock, &__key, true); \ + __spin_lock_init(slock, #slock, &__key, percpu); \ } while (0) +#define spin_lock_init(slock) _spin_lock_init(slock, false) +#define local_spin_lock_init(slock) _spin_lock_init(slock, true) + extern void rt_spin_lock(spinlock_t *lock) __acquires(lock); extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock); extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock); -- cgit v1.2.3 From c554aa9ca976480839af342204e05bb4ce8367d5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 1 Nov 2024 22:13:33 +0100 Subject: uprobes: Re-order struct uprobe_task to save some space On x86_64, with allmodconfig, struct uprobe_task is 72 bytes long, with a hole and some padding. /* size: 72, cachelines: 2, members: 7 */ /* sum members: 64, holes: 1, sum holes: 4 */ /* padding: 4 */ /* forced alignments: 1, forced holes: 1, sum forced holes: 4 */ /* last cacheline: 8 bytes */ Reorder the structure to fill the hole and avoid the padding. This way, the whole structure fits in a single cacheline and some memory is saved when it is allocated. /* size: 64, cachelines: 1, members: 7 */ /* forced alignments: 1 */ Signed-off-by: Christophe JAILLET Signed-off-by: Peter Zijlstra (Intel) Acked-by: "Masami Hiramatsu (Google)" Link: https://lore.kernel.org/r/a9f541d0cedf421f765c77a1fb93d6a979778a88.1730495562.git.christophe.jaillet@wanadoo.fr --- include/linux/uprobes.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 7a051b5d2edd..e0a4c2082245 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -121,6 +121,9 @@ struct hprobe { struct uprobe_task { enum uprobe_task_state state; + unsigned int depth; + struct return_instance *return_instances; + union { struct { struct arch_uprobe_task autask; @@ -138,9 +141,6 @@ struct uprobe_task { unsigned long xol_vaddr; struct arch_uprobe *auprobe; - - struct return_instance *return_instances; - unsigned int depth; }; struct return_consumer { -- cgit v1.2.3 From ed76c07c6885b249ce8486dac22fb97151a83185 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 5 Nov 2024 16:45:08 -0300 Subject: printk: Introduce FORCE_CON flag Introduce FORCE_CON flag to printk. The new flag will make it possible to create a context where printk messages will never be suppressed. This mechanism will be used in the next patch to create a force_con context on sysrq handling, removing an existing workaround on the loglevel global variable. The workaround existed to make sure that sysrq header messages were sent to all consoles, but this doesn't work with deferred messages because the loglevel might be restored to its original value before a console flushes the messages. Signed-off-by: Marcos Paulo de Souza Reviewed-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20241105-printk-loud-con-v2-1-bd3ecdf7b0e4@suse.com Signed-off-by: Petr Mladek --- include/linux/printk.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index eca9bb2ee637..232e5fd06701 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -166,6 +166,9 @@ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); extern void __printk_deferred_enter(void); extern void __printk_deferred_exit(void); +extern void printk_force_console_enter(void); +extern void printk_force_console_exit(void); + /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts -- cgit v1.2.3 From c28b97f53be7b598ae5fb47cc9dc80912019d7a8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 24 Sep 2024 17:12:32 +0100 Subject: btrfs: qgroups: remove bytenr field from struct btrfs_qgroup_extent_record Now that we track qgroup extent records in a xarray we don't need to have a "bytenr" field in struct btrfs_qgroup_extent_record, since we can get it from the index of the record in the xarray. So remove the field and grab the bytenr from either the index key or any other place where it's available (delayed refs). This reduces the size of struct btrfs_qgroup_extent_record from 40 bytes down to 32 bytes, meaning that we now can store 128 instances of this structure instead of 102 per 4K page. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index af6b3827fb1d..8d2ff32fb3b0 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1706,9 +1706,10 @@ DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data, DECLARE_EVENT_CLASS(btrfs_qgroup_extent, TP_PROTO(const struct btrfs_fs_info *fs_info, - const struct btrfs_qgroup_extent_record *rec), + const struct btrfs_qgroup_extent_record *rec, + u64 bytenr), - TP_ARGS(fs_info, rec), + TP_ARGS(fs_info, rec, bytenr), TP_STRUCT__entry_btrfs( __field( u64, bytenr ) @@ -1716,7 +1717,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent, ), TP_fast_assign_btrfs(fs_info, - __entry->bytenr = rec->bytenr; + __entry->bytenr = bytenr; __entry->num_bytes = rec->num_bytes; ), @@ -1727,17 +1728,19 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent, DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents, TP_PROTO(const struct btrfs_fs_info *fs_info, - const struct btrfs_qgroup_extent_record *rec), + const struct btrfs_qgroup_extent_record *rec, + u64 bytenr), - TP_ARGS(fs_info, rec) + TP_ARGS(fs_info, rec, bytenr) ); DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent, TP_PROTO(const struct btrfs_fs_info *fs_info, - const struct btrfs_qgroup_extent_record *rec), + const struct btrfs_qgroup_extent_record *rec, + u64 bytenr), - TP_ARGS(fs_info, rec) + TP_ARGS(fs_info, rec, bytenr) ); TRACE_EVENT(qgroup_num_dirty_extents, -- cgit v1.2.3 From b628c139519ae0e5453e5327161a41bae966201d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 3 Oct 2024 15:27:27 +0100 Subject: btrfs: remove unused btrfs_try_tree_write_lock() btrfs_try_tree_write_lock() has been unused since commit 50b21d7a066f ("btrfs: submit a writeback bio per extent_buffer"). Remove it as we don't need it anymore. Reviewed-by: Christoph Hellwig Reviewed-by: Qu Wenruo Signed-off-by: Dr. David Alan Gilbert Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8d2ff32fb3b0..bd515415ea8b 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2344,7 +2344,6 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking); DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read); DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write); DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock); -DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock); DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic); DECLARE_EVENT_CLASS(btrfs__space_info_update, -- cgit v1.2.3 From 70a5f9e266cf1f544f33ce56f1eee634d79f3030 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 4 Sep 2024 17:03:43 +0100 Subject: btrfs: simplify tracking progress for the extent map shrinker Now that the extent map shrinker can only be run by a single task (as a work queue item) there is no need to keep the progress of the shrinker protected by a spinlock and passing the progress to trace events as parameters. So remove the lock and simplify the arguments for the trace events. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index bd515415ea8b..879016b4e391 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2555,10 +2555,9 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count, TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr, - u64 last_root_id, u64 last_ino), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr), - TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino), + TP_ARGS(fs_info, nr), TP_STRUCT__entry_btrfs( __field( long, nr_to_scan ) @@ -2568,10 +2567,11 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, ), TP_fast_assign_btrfs(fs_info, - __entry->nr_to_scan = nr_to_scan; + __entry->nr_to_scan = \ + atomic64_read(&fs_info->extent_map_shrinker_nr_to_scan); __entry->nr = nr; - __entry->last_root_id = last_root_id; - __entry->last_ino = last_ino; + __entry->last_root_id = fs_info->extent_map_shrinker_last_root; + __entry->last_ino = fs_info->extent_map_shrinker_last_ino; ), TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", @@ -2581,10 +2581,9 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr, - u64 last_root_id, u64 last_ino), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr), - TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino), + TP_ARGS(fs_info, nr_dropped, nr), TP_STRUCT__entry_btrfs( __field( long, nr_dropped ) @@ -2596,8 +2595,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, TP_fast_assign_btrfs(fs_info, __entry->nr_dropped = nr_dropped; __entry->nr = nr; - __entry->last_root_id = last_root_id; - __entry->last_ino = last_ino; + __entry->last_root_id = fs_info->extent_map_shrinker_last_root; + __entry->last_ino = fs_info->extent_map_shrinker_last_ino; ), TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", -- cgit v1.2.3 From e7fa845010f12bb98251a2bf9dcf39fd601f9424 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 5 Sep 2024 11:31:49 +0100 Subject: btrfs: rename extent map shrinker members from struct btrfs_fs_info The names for the members of struct btrfs_fs_info related to the extent map shrinker are a bit too long, so rename them to be shorter by replacing the "extent_map_" prefix with the "em_" prefix. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 879016b4e391..4df93ca9b7a8 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2568,10 +2568,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TP_fast_assign_btrfs(fs_info, __entry->nr_to_scan = \ - atomic64_read(&fs_info->extent_map_shrinker_nr_to_scan); + atomic64_read(&fs_info->em_shrinker_nr_to_scan); __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = fs_info->em_shrinker_last_root; + __entry->last_ino = fs_info->em_shrinker_last_ino; ), TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", @@ -2595,8 +2595,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, TP_fast_assign_btrfs(fs_info, __entry->nr_dropped = nr_dropped; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = fs_info->em_shrinker_last_root; + __entry->last_ino = fs_info->em_shrinker_last_ino; ), TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", -- cgit v1.2.3 From df3b8ca604f224eb4cd51669416ad4d607682273 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Nov 2024 16:12:04 +0000 Subject: io_uring/cmd: let cmds to know about dying task When the taks that submitted a request is dying, a task work for that request might get run by a kernel thread or even worse by a half dismantled task. We can't just cancel the task work without running the callback as the cmd might need to do some clean up, so pass a flag instead. If set, it's not safe to access any task resources and the callback is expected to cancel the cmd ASAP. Reviewed-by: Jens Axboe Reviewed-by: Ming Lei Signed-off-by: Pavel Begunkov Signed-off-by: David Sterba --- include/linux/io_uring_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 4b9ba523978d..2ee5dc105b58 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -37,6 +37,7 @@ enum io_uring_cmd_flags { /* set when uring wants to cancel a previously issued command */ IO_URING_F_CANCEL = (1 << 11), IO_URING_F_COMPAT = (1 << 12), + IO_URING_F_TASK_DEAD = (1 << 13), }; struct io_wq_work_node { -- cgit v1.2.3 From 6c83d153ed86eb17c46eafe4e78af4ce2071a052 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 21 Oct 2024 21:28:26 +0200 Subject: btrfs: add new ioctl to wait for cleaned subvolumes Add a new unprivileged ioctl that will let the command 'btrfs subvolume sync' work without the (privileged) SEARCH_TREE ioctl. There are several modes of operation, where the most common ones are to wait on a specific subvolume or all currently queued for cleaning. This is utilized e.g. in backup applications that delete subvolumes and wait until they're cleaned to check for remaining space. The other modes are for flexibility, e.g. for monitoring or checkpoints in the queue of deleted subvolumes, again without the need to use SEARCH_TREE. Notes: - waiting is interruptible, the timeout is set to 1 second and is not configurable - repeated calls to the ioctl see a different state, so this is inherently racy when using e.g. the count or peek next/last Use cases: - a subvolume A was deleted, wait for cleaning (WAIT_FOR_ONE) - a bunch of subvolumes were deleted, wait for all (WAIT_FOR_QUEUED or PEEK_LAST + WAIT_FOR_ONE) - count how many are queued (not blocking), for monitoring purposes - report progress (PEEK_NEXT), may miss some if cleaning is quick - own waiting in user space (PEEK_LAST until it's 0) Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index cdf6ad872149..d3b222d7af24 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -1049,6 +1049,29 @@ struct btrfs_ioctl_encoded_io_args { #define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 #define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 +/* + * Wait for subvolume cleaning process. This queries the kernel queue and it + * can change between the calls. + * + * - FOR_ONE - specify the subvolid + * - FOR_QUEUED - wait for all currently queued + * - COUNT - count number of queued + * - PEEK_FIRST - read which is the first in the queue (to be cleaned or being + * cleaned already), or 0 if the queue is empty + * - PEEK_LAST - read the last subvolid in the queue, or 0 if the queue is empty + */ +struct btrfs_ioctl_subvol_wait { + __u64 subvolid; + __u32 mode; + __u32 count; +}; + +#define BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE (0) +#define BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED (1) +#define BTRFS_SUBVOL_SYNC_COUNT (2) +#define BTRFS_SUBVOL_SYNC_PEEK_FIRST (3) +#define BTRFS_SUBVOL_SYNC_PEEK_LAST (4) + /* Error codes as returned by the kernel */ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, @@ -1181,6 +1204,8 @@ enum btrfs_err_code { struct btrfs_ioctl_encoded_io_args) #define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \ + struct btrfs_ioctl_subvol_wait) #ifdef __cplusplus } -- cgit v1.2.3 From cbe82e140bb76e1aa9f808cc841654a25b70d4e5 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 3 Nov 2024 15:51:49 +0100 Subject: asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock The arch_spinlock_t of qspinlock has contained the atomic_t val, which satisfies the ticket-lock requirement. Thus, unify the arch_spinlock_t into qspinlock_types.h. This is the preparation for the next combo spinlock. Reviewed-by: Leonardo Bras Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/ Signed-off-by: Guo Ren Signed-off-by: Guo Ren Acked-by: Peter Zijlstra (Intel) Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-10-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- include/asm-generic/spinlock.h | 14 +++++++------- include/asm-generic/spinlock_types.h | 12 ++---------- 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h index 90803a826ba0..4773334ee638 100644 --- a/include/asm-generic/spinlock.h +++ b/include/asm-generic/spinlock.h @@ -32,7 +32,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) { - u32 val = atomic_fetch_add(1<<16, lock); + u32 val = atomic_fetch_add(1<<16, &lock->val); u16 ticket = val >> 16; if (ticket == (u16)val) @@ -46,31 +46,31 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) * have no outstanding writes due to the atomic_fetch_add() the extra * orderings are free. */ - atomic_cond_read_acquire(lock, ticket == (u16)VAL); + atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); smp_mb(); } static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock) { - u32 old = atomic_read(lock); + u32 old = atomic_read(&lock->val); if ((old >> 16) != (old & 0xffff)) return false; - return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ + return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */ } static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) { u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - u32 val = atomic_read(lock); + u32 val = atomic_read(&lock->val); smp_store_release(ptr, (u16)val + 1); } static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { - u32 val = lock.counter; + u32 val = lock.val.counter; return ((val >> 16) == (val & 0xffff)); } @@ -84,7 +84,7 @@ static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) { - u32 val = atomic_read(lock); + u32 val = atomic_read(&lock->val); return (s16)((val >> 16) - (val & 0xffff)) > 1; } diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h index 8962bb730945..f534aa5de394 100644 --- a/include/asm-generic/spinlock_types.h +++ b/include/asm-generic/spinlock_types.h @@ -3,15 +3,7 @@ #ifndef __ASM_GENERIC_SPINLOCK_TYPES_H #define __ASM_GENERIC_SPINLOCK_TYPES_H -#include -typedef atomic_t arch_spinlock_t; - -/* - * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the - * include. - */ -#include - -#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) +#include +#include #endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */ -- cgit v1.2.3 From 22c33321e260c8b4c1877b2cc0c4e26a0c74c23f Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 3 Nov 2024 15:51:50 +0100 Subject: asm-generic: ticket-lock: Add separate ticket-lock.h Add a separate ticket-lock.h to include multiple spinlock versions and select one at compile time or runtime. Reviewed-by: Leonardo Bras Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/ Signed-off-by: Guo Ren Signed-off-by: Guo Ren Acked-by: Peter Zijlstra (Intel) Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-11-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- include/asm-generic/spinlock.h | 87 +--------------------------- include/asm-generic/ticket_spinlock.h | 103 ++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 86 deletions(-) create mode 100644 include/asm-generic/ticket_spinlock.h (limited to 'include') diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h index 4773334ee638..970590baf61b 100644 --- a/include/asm-generic/spinlock.h +++ b/include/asm-generic/spinlock.h @@ -1,94 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * 'Generic' ticket-lock implementation. - * - * It relies on atomic_fetch_add() having well defined forward progress - * guarantees under contention. If your architecture cannot provide this, stick - * to a test-and-set lock. - * - * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a - * sub-word of the value. This is generally true for anything LL/SC although - * you'd be hard pressed to find anything useful in architecture specifications - * about this. If your architecture cannot do this you might be better off with - * a test-and-set. - * - * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence - * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with - * a full fence after the spin to upgrade the otherwise-RCpc - * atomic_cond_read_acquire(). - * - * The implementation uses smp_cond_load_acquire() to spin, so if the - * architecture has WFE like instructions to sleep instead of poll for word - * modifications be sure to implement that (see ARM64 for example). - * - */ - #ifndef __ASM_GENERIC_SPINLOCK_H #define __ASM_GENERIC_SPINLOCK_H -#include -#include - -static __always_inline void arch_spin_lock(arch_spinlock_t *lock) -{ - u32 val = atomic_fetch_add(1<<16, &lock->val); - u16 ticket = val >> 16; - - if (ticket == (u16)val) - return; - - /* - * atomic_cond_read_acquire() is RCpc, but rather than defining a - * custom cond_read_rcsc() here we just emit a full fence. We only - * need the prior reads before subsequent writes ordering from - * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we - * have no outstanding writes due to the atomic_fetch_add() the extra - * orderings are free. - */ - atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); - smp_mb(); -} - -static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock) -{ - u32 old = atomic_read(&lock->val); - - if ((old >> 16) != (old & 0xffff)) - return false; - - return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */ -} - -static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - u32 val = atomic_read(&lock->val); - - smp_store_release(ptr, (u16)val + 1); -} - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - u32 val = lock.val.counter; - - return ((val >> 16) == (val & 0xffff)); -} - -static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - arch_spinlock_t val = READ_ONCE(*lock); - - return !arch_spin_value_unlocked(val); -} - -static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) -{ - u32 val = atomic_read(&lock->val); - - return (s16)((val >> 16) - (val & 0xffff)) > 1; -} - +#include #include #endif /* __ASM_GENERIC_SPINLOCK_H */ diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h new file mode 100644 index 000000000000..cfcff22b37b3 --- /dev/null +++ b/include/asm-generic/ticket_spinlock.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * 'Generic' ticket-lock implementation. + * + * It relies on atomic_fetch_add() having well defined forward progress + * guarantees under contention. If your architecture cannot provide this, stick + * to a test-and-set lock. + * + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a + * sub-word of the value. This is generally true for anything LL/SC although + * you'd be hard pressed to find anything useful in architecture specifications + * about this. If your architecture cannot do this you might be better off with + * a test-and-set. + * + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence + * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with + * a full fence after the spin to upgrade the otherwise-RCpc + * atomic_cond_read_acquire(). + * + * The implementation uses smp_cond_load_acquire() to spin, so if the + * architecture has WFE like instructions to sleep instead of poll for word + * modifications be sure to implement that (see ARM64 for example). + * + */ + +#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H +#define __ASM_GENERIC_TICKET_SPINLOCK_H + +#include +#include + +static __always_inline void ticket_spin_lock(arch_spinlock_t *lock) +{ + u32 val = atomic_fetch_add(1<<16, &lock->val); + u16 ticket = val >> 16; + + if (ticket == (u16)val) + return; + + /* + * atomic_cond_read_acquire() is RCpc, but rather than defining a + * custom cond_read_rcsc() here we just emit a full fence. We only + * need the prior reads before subsequent writes ordering from + * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we + * have no outstanding writes due to the atomic_fetch_add() the extra + * orderings are free. + */ + atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); + smp_mb(); +} + +static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock) +{ + u32 old = atomic_read(&lock->val); + + if ((old >> 16) != (old & 0xffff)) + return false; + + return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */ +} + +static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock) +{ + u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + u32 val = atomic_read(&lock->val); + + smp_store_release(ptr, (u16)val + 1); +} + +static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock) +{ + u32 val = lock.val.counter; + + return ((val >> 16) == (val & 0xffff)); +} + +static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock) +{ + arch_spinlock_t val = READ_ONCE(*lock); + + return !ticket_spin_value_unlocked(val); +} + +static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock) +{ + u32 val = atomic_read(&lock->val); + + return (s16)((val >> 16) - (val & 0xffff)) > 1; +} + +/* + * Remapping spinlock architecture specific functions to the corresponding + * ticket spinlock functions. + */ +#define arch_spin_is_locked(l) ticket_spin_is_locked(l) +#define arch_spin_is_contended(l) ticket_spin_is_contended(l) +#define arch_spin_value_unlocked(l) ticket_spin_value_unlocked(l) +#define arch_spin_lock(l) ticket_spin_lock(l) +#define arch_spin_trylock(l) ticket_spin_trylock(l) +#define arch_spin_unlock(l) ticket_spin_unlock(l) + +#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */ -- cgit v1.2.3 From ab83647fadae2f1f723119dc066b39a461d6d288 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:53 +0100 Subject: riscv: Add qspinlock support In order to produce a generic kernel, a user can select CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket spinlock implementation if Zabha or Ziccrse are not present. Note that we can't use alternatives here because the discovery of extensions is done too late and we need to start with the qspinlock implementation because the ticket spinlock implementation would pollute the spinlock value, so let's use static keys. This is largely based on Guo's work and Leonardo reviews at [1]. Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1] Signed-off-by: Guo Ren Signed-off-by: Alexandre Ghiti Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-14-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- include/asm-generic/qspinlock.h | 2 ++ include/asm-generic/ticket_spinlock.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 0655aa5b57b2..bf47cca2c375 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock) } #endif +#ifndef __no_arch_spinlock_redefine /* * Remapping spinlock architecture specific functions to the corresponding * queued spinlock functions. @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock) #define arch_spin_lock(l) queued_spin_lock(l) #define arch_spin_trylock(l) queued_spin_trylock(l) #define arch_spin_unlock(l) queued_spin_unlock(l) +#endif #endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h index cfcff22b37b3..325779970d8a 100644 --- a/include/asm-generic/ticket_spinlock.h +++ b/include/asm-generic/ticket_spinlock.h @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock) return (s16)((val >> 16) - (val & 0xffff)) > 1; } +#ifndef __no_arch_spinlock_redefine /* * Remapping spinlock architecture specific functions to the corresponding * ticket spinlock functions. @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock) #define arch_spin_lock(l) ticket_spin_lock(l) #define arch_spin_trylock(l) ticket_spin_trylock(l) #define arch_spin_unlock(l) ticket_spin_unlock(l) +#endif #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */ -- cgit v1.2.3 From 0ef2b9e698dbf9ba78f67952a747f35eb7060470 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Nov 2024 07:26:31 +0100 Subject: block: lift bio_is_zone_append to bio.h Make bio_is_zone_append globally available, because file systems need to use to check for a zone append bio in their end_io handlers to deal with the block layer emulation. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241104062647.91160-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4a1bf43ca53d..60830a6a5939 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -675,6 +675,23 @@ static inline void bio_clear_polled(struct bio *bio) bio->bi_opf &= ~REQ_POLLED; } +/** + * bio_is_zone_append - is this a zone append bio? + * @bio: bio to check + * + * Check if @bio is a zone append operation. Core block layer code and end_io + * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check + * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if + * it is not natively supported. + */ +static inline bool bio_is_zone_append(struct bio *bio) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return false; + return bio_op(bio) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND); +} + struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); -- cgit v1.2.3 From d920179b3d4842a0e27cae54fdddbe5ef3977e73 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Nov 2024 14:45:34 +0100 Subject: bpf: Add support for uprobe multi session attach Adding support to attach BPF program for entry and return probe of the same function. This is common use case which at the moment requires to create two uprobe multi links. Adding new BPF_TRACE_UPROBE_SESSION attach type that instructs kernel to attach single link program to both entry and exit probe. It's possible to control execution of the BPF program on return probe simply by returning zero or non zero from the entry BPF program execution to execute or not the BPF program on return probe respectively. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241108134544.480660-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f28b6527e815..4162afc6b5d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1116,6 +1116,7 @@ enum bpf_attach_type { BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, + BPF_TRACE_UPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From f6b9a69a9e56b2083aca8a925fc1a28eb698e3ed Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 9 Nov 2024 15:14:29 -0800 Subject: bpf: Refactor active lock management When bpf_spin_lock was introduced originally, there was deliberation on whether to use an array of lock IDs, but since bpf_spin_lock is limited to holding a single lock at any given time, we've been using a single ID to identify the held lock. In preparation for introducing spin locks that can be taken multiple times, introduce support for acquiring multiple lock IDs. For this purpose, reuse the acquired_refs array and store both lock and pointer references. We tag the entry with REF_TYPE_PTR or REF_TYPE_LOCK to disambiguate and find the relevant entry. The ptr field is used to track the map_ptr or btf (for bpf_obj_new allocations) to ensure locks can be matched with protected fields within the same "allocation", i.e. bpf_obj_new object or map value. The struct active_lock is changed to an int as the state is part of the acquired_refs array, and we only need active_lock as a cheap way of detecting lock presence. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241109231430.2475236-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf_verifier.h | 53 +++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4513372c5bc8..d84beed92ae4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -48,22 +48,6 @@ enum bpf_reg_liveness { REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ }; -/* For every reg representing a map value or allocated object pointer, - * we consider the tuple of (ptr, id) for them to be unique in verifier - * context and conside them to not alias each other for the purposes of - * tracking lock state. - */ -struct bpf_active_lock { - /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, - * there's no active lock held, and other fields have no - * meaning. If non-NULL, it indicates that a lock is held and - * id member has the reg->id of the register which can be >= 0. - */ - void *ptr; - /* This will be reg->id */ - u32 id; -}; - #define ITER_PREFIX "bpf_iter_" enum bpf_iter_state { @@ -266,6 +250,13 @@ struct bpf_stack_state { }; struct bpf_reference_state { + /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to + * default to pointer reference on zero initialization of a state. + */ + enum ref_state_type { + REF_TYPE_PTR = 0, + REF_TYPE_LOCK, + } type; /* Track each reference created with a unique id, even if the same * instruction creates the reference multiple times (eg, via CALL). */ @@ -274,17 +265,23 @@ struct bpf_reference_state { * is used purely to inform the user of a reference leak. */ int insn_idx; - /* There can be a case like: - * main (frame 0) - * cb (frame 1) - * func (frame 3) - * cb (frame 4) - * Hence for frame 4, if callback_ref just stored boolean, it would be - * impossible to distinguish nested callback refs. Hence store the - * frameno and compare that to callback_ref in check_reference_leak when - * exiting a callback function. - */ - int callback_ref; + union { + /* There can be a case like: + * main (frame 0) + * cb (frame 1) + * func (frame 3) + * cb (frame 4) + * Hence for frame 4, if callback_ref just stored boolean, it would be + * impossible to distinguish nested callback refs. Hence store the + * frameno and compare that to callback_ref in check_reference_leak when + * exiting a callback function. + */ + int callback_ref; + /* Use to keep track of the source object of a lock, to ensure + * it matches on unlock. + */ + void *ptr; + }; }; struct bpf_retval_range { @@ -332,6 +329,7 @@ struct bpf_func_state { /* The following fields should be last. See copy_func_state() */ int acquired_refs; + int active_locks; struct bpf_reference_state *refs; /* The state of the stack. Each element of the array describes BPF_REG_SIZE * (i.e. 8) bytes worth of stack memory. @@ -434,7 +432,6 @@ struct bpf_verifier_state { u32 insn_idx; u32 curframe; - struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; u32 active_preempt_lock; -- cgit v1.2.3 From ae6e3a273f590a2b64f14a9fab3546c3a8f44ed4 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 9 Nov 2024 15:14:30 -0800 Subject: bpf: Drop special callback reference handling Logic to prevent callbacks from acquiring new references for the program (i.e. leaving acquired references), and releasing caller references (i.e. those acquired in parent frames) was introduced in commit 9d9d00ac29d0 ("bpf: Fix reference state management for synchronous callbacks"). This was necessary because back then, the verifier simulated each callback once (that could potentially be executed N times, where N can be zero). This meant that callbacks that left lingering resources or cleared caller resources could do it more than once, operating on undefined state or leaking memory. With the fixes to callback verification in commit ab5cfac139ab ("bpf: verify callbacks as if they are called unknown number of times"), all of this extra logic is no longer necessary. Hence, drop it as part of this commit. Cc: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241109231430.2475236-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf_verifier.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d84beed92ae4..3a74033d49c4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -265,23 +265,10 @@ struct bpf_reference_state { * is used purely to inform the user of a reference leak. */ int insn_idx; - union { - /* There can be a case like: - * main (frame 0) - * cb (frame 1) - * func (frame 3) - * cb (frame 4) - * Hence for frame 4, if callback_ref just stored boolean, it would be - * impossible to distinguish nested callback refs. Hence store the - * frameno and compare that to callback_ref in check_reference_leak when - * exiting a callback function. - */ - int callback_ref; - /* Use to keep track of the source object of a lock, to ensure - * it matches on unlock. - */ - void *ptr; - }; + /* Use to keep track of the source object of a lock, to ensure + * it matches on unlock. + */ + void *ptr; }; struct bpf_retval_range { -- cgit v1.2.3 From 559218d43ec9dde3d2847c7aa127e88d6ab1c9ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Nov 2024 16:46:51 +0100 Subject: block: pre-calculate max_zone_append_sectors max_zone_append_sectors differs from all other queue limits in that the final value used is not stored in the queue_limits but needs to be obtained using queue_limits_max_zone_append_sectors helper. This not only adds (tiny) extra overhead to the I/O path, but also can be easily forgotten in file system code. Add a new max_hw_zone_append_sectors value to queue_limits which is set by the driver, and calculate max_zone_append_sectors from that and the other inputs in blk_validate_zoned_limits, similar to how max_sectors is calculated to fix this. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241104073955.112324-3-hch@lst.de Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20241108154657.845768-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1b51a7c92e9b..65f37ae70712 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,6 +375,7 @@ struct queue_limits { unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_hw_zone_append_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -1208,25 +1209,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned int -queue_limits_max_zone_append_sectors(const struct queue_limits *l) -{ - unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); - - return min_not_zero(l->max_zone_append_sectors, max_sectors); -} - -static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) -{ - if (!blk_queue_is_zoned(q)) - return 0; - - return queue_limits_max_zone_append_sectors(&q->limits); -} - static inline bool queue_emulates_zone_append(struct request_queue *q) { - return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; + return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors; } static inline bool bdev_emulates_zone_append(struct block_device *bdev) @@ -1237,7 +1222,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev) static inline unsigned int bdev_max_zone_append_sectors(struct block_device *bdev) { - return queue_max_zone_append_sectors(bdev_get_queue(bdev)); + return bdev_limits(bdev)->max_zone_append_sectors; } static inline unsigned int bdev_max_segments(struct block_device *bdev) -- cgit v1.2.3 From 377dda2cff59825079aee3906aa4904779747b0b Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 25 Oct 2024 11:23:29 +0800 Subject: drm/fourcc: add AMD_FMT_MOD_TILE_GFX9_4K_D_X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used when radeonsi export small texture's modifier to user with eglExportDMABUFImageQueryMESA(). mesa changes is available here: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31658 Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 78abd819fd62..70f3b00b0681 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1516,6 +1516,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * 64K_D_2D on GFX12 is identical to 64K_D on GFX11. */ #define AMD_FMT_MOD_TILE_GFX9_64K_D 10 +#define AMD_FMT_MOD_TILE_GFX9_4K_D_X 22 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 #define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 -- cgit v1.2.3 From fd0a5afb5455b4561bfc6dfb0c4b2d8226f9ccfe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Nov 2024 20:16:30 +0300 Subject: kunit: skb: use "gfp" variable instead of hardcoding GFP_KERNEL The intent here was clearly to use the gfp variable flags instead of hardcoding GFP_KERNEL. All the callers pass GFP_KERNEL as the gfp flags so this doesn't affect runtime. Fixes: b3231d353a51 ("kunit: add a convenience allocation wrapper for SKBs") Signed-off-by: Dan Carpenter Reviewed-by: David Gow Reviewed-by: Kuan-Wei Chiu Signed-off-by: Shuah Khan --- include/kunit/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kunit/skbuff.h b/include/kunit/skbuff.h index 44d12370939a..345e1e8f0312 100644 --- a/include/kunit/skbuff.h +++ b/include/kunit/skbuff.h @@ -29,7 +29,7 @@ static void kunit_action_kfree_skb(void *p) static inline struct sk_buff *kunit_zalloc_skb(struct kunit *test, int len, gfp_t gfp) { - struct sk_buff *res = alloc_skb(len, GFP_KERNEL); + struct sk_buff *res = alloc_skb(len, gfp); if (!res || skb_pad(res, len)) return NULL; -- cgit v1.2.3 From 5a47c2080a7316f184107464e4f76737c0c05186 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 6 Nov 2024 15:34:46 +0800 Subject: nvmet: support reservation feature This patch implements the reservation feature, including: 1. reservation register(register, unregister and replace). 2. reservation acquire(acquire, preempt, preempt and abort). 3. reservation release(release and clear). 4. reservation report. 5. set feature and get feature of reservation notify mask. 6. get log page of reservation event. Not supported: 1. persistent reservation through power loss. Test cases: Use nvme-cli and fio to test all implemented sub features: 1. use nvme resv-register to register host a registrant or unregister or replace a new key. 2. use nvme resv-acquire to set host to the holder, and use fio to send read and write io in all reservation type. And also test preempt and "preempt and abort". 3. use nvme resv-report to show all registrants and reservation status. 4. use nvme resv-release to release all registrants. 5. use nvme get-log to get events generated by the preceding operations. In addition, make reservation configurable, one can set ns to support reservation before enable ns. The default of resv_enable is false. Signed-off-by: Guixin Liu Reviewed-by: Dmitry Bogdanov Reviewed-by: Christoph Hellwig Tested-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 44d048d68503..0179bb6d502d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2045,7 +2045,7 @@ enum { NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00, NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01, NVME_PR_LOG_RESERVATION_RELEASED = 0x02, - NVME_PR_LOG_RESERVATOPM_PREEMPTED = 0x03, + NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03, }; enum { -- cgit v1.2.3 From 61c9967cd63448292a64f9ee9aeb6e2053e3a624 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Nov 2024 13:24:36 -0800 Subject: nvmet: implement active command set ns list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required for nvme 2.1 for targets that support multiple command sets. We support NVM and ZNS, so are required to support this identification. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Matias Bjørling Signed-off-by: Keith Busch --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 0179bb6d502d..26de7c5c12be 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -522,6 +522,7 @@ enum { NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_CS_NS = 0x05, NVME_ID_CNS_CS_CTRL = 0x06, + NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07, NVME_ID_CNS_NS_CS_INDEP = 0x08, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, -- cgit v1.2.3 From 83acb24e6de7bbb5cb0df1ba0f47a92da9112061 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Nov 2024 14:00:14 -0800 Subject: nvmet: implement supported log pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This log is required for nvme 2.1. Reviewed-by: Matias Bjørling Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26de7c5c12be..e9e508bca60f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1245,6 +1245,7 @@ enum { NVME_FEAT_WRITE_PROTECT = 0x84, NVME_FEAT_VENDOR_START = 0xC0, NVME_FEAT_VENDOR_END = 0xFF, + NVME_LOG_SUPPORTED = 0x00, NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, @@ -1262,6 +1263,14 @@ enum { NVME_FWACT_ACTV = (2 << 3), }; +struct nvme_supported_log { + __le32 lids[256]; +}; + +enum { + NVME_LIDS_LSUPP = 1 << 0, +}; + /* NVMe Namespace Write Protect State */ enum { NVME_NS_NO_WRITE_PROTECT = 0, -- cgit v1.2.3 From e973c91727d49bb128c95210b3aa1960b9421d18 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Nov 2024 14:07:42 -0800 Subject: nvmet: implement supported features log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This log is required for nvme 2.1. Reviewed-by: Matias Bjørling Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e9e508bca60f..31d7ec6d8b93 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1256,6 +1256,7 @@ enum { NVME_LOG_TELEMETRY_CTRL = 0x08, NVME_LOG_ENDURANCE_GROUP = 0x09, NVME_LOG_ANA = 0x0c, + NVME_LOG_FEATURES = 0x12, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1271,6 +1272,16 @@ enum { NVME_LIDS_LSUPP = 1 << 0, }; +struct nvme_supported_features_log { + __le32 fis[256]; +}; + +enum { + NVME_FIS_FSUPP = 1 << 0, + NVME_FIS_NSCPE = 1 << 20, + NVME_FIS_CSCPE = 1 << 21, +}; + /* NVMe Namespace Write Protect State */ enum { NVME_NS_NO_WRITE_PROTECT = 0, -- cgit v1.2.3 From 266b652c65b44fb2ccfa17cdb54ce2ef723deb0a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 1 Nov 2024 14:46:01 -0700 Subject: nvmet: implement endurance groups Most of the returned information is just stubbed data. The target must support these in order to report rotational media. Since this driver doesn't know any better, each namespace is its own endurance group with the engid value matching the nsid. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 31d7ec6d8b93..6d5b4299a1b2 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -327,7 +327,8 @@ struct nvme_id_ctrl { __le32 sanicap; __le32 hmminds; __le16 hmmaxd; - __u8 rsvd338[4]; + __le16 nvmsetidmax; + __le16 endgidmax; __u8 anatt; __u8 anacap; __le32 anagrpmax; @@ -531,6 +532,7 @@ enum { NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15, NVME_ID_CNS_NS_GRANULARITY = 0x16, NVME_ID_CNS_UUID_LIST = 0x17, + NVME_ID_CNS_ENDGRP_LIST = 0x19, }; enum { @@ -618,6 +620,28 @@ enum { NVME_NIDT_CSI = 0x04, }; +struct nvme_endurance_group_log { + __u8 egcw; + __u8 egfeat; + __u8 rsvd2; + __u8 avsp; + __u8 avspt; + __u8 pused; + __le16 did; + __u8 rsvd8[24]; + __u8 ee[16]; + __u8 dur[16]; + __u8 duw[16]; + __u8 muw[16]; + __u8 hrc[16]; + __u8 hwc[16]; + __u8 mdie[16]; + __u8 neile[16]; + __u8 tegcap[16]; + __u8 uegcap[16]; + __u8 rsvd192[320]; +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -1302,7 +1326,8 @@ struct nvme_identify { __u8 cns; __u8 rsvd3; __le16 ctrlid; - __u8 rsvd11[3]; + __le16 cnssid; + __u8 rsvd11; __u8 csi; __u32 rsvd12[4]; }; -- cgit v1.2.3 From 5fd075cdaf3649000677d960fd9e45c08081b7e0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 1 Nov 2024 13:48:47 -0700 Subject: nvmet: implement rotational media information log Most of the information is stubbed. Supporting these commands is a requirement for supporting rotational media. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6d5b4299a1b2..99cf0ee73714 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -642,6 +642,18 @@ struct nvme_endurance_group_log { __u8 rsvd192[320]; }; +struct nvme_rotational_media_log { + __le16 endgid; + __le16 numa; + __le16 nrs; + __u8 rsvd6[2]; + __le32 spinc; + __le32 fspinc; + __le32 ldc; + __le32 fldc; + __u8 rsvd24[488]; +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -1281,6 +1293,7 @@ enum { NVME_LOG_ENDURANCE_GROUP = 0x09, NVME_LOG_ANA = 0x0c, NVME_LOG_FEATURES = 0x12, + NVME_LOG_RMI = 0x16, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1435,7 +1448,7 @@ struct nvme_get_log_page_command { __u8 lsp; /* upper 4 bits reserved */ __le16 numdl; __le16 numdu; - __u16 rsvd11; + __le16 lsi; union { struct { __le32 lpol; -- cgit v1.2.3 From e2758c76a0ab2032a0d11abc1c2ff08661fdf428 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 1 Nov 2024 12:29:40 -0700 Subject: nvmet: support for csi identify ns Implements reporting the I/O Command Set Independent Identify Namespace command. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 99cf0ee73714..c136d64c7d73 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -563,6 +563,7 @@ enum { NVME_NS_FLBAS_LBA_SHIFT = 1, NVME_NS_FLBAS_META_EXT = 0x10, NVME_NS_NMIC_SHARED = 1 << 0, + NVME_NS_ROTATIONAL = 1 << 4, NVME_LBAF_RP_BEST = 0, NVME_LBAF_RP_BETTER = 1, NVME_LBAF_RP_GOOD = 2, -- cgit v1.2.3 From 8a825d22a70915bd80c811fa93538cf2540af29d Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Mon, 4 Nov 2024 16:55:00 +0800 Subject: nvme: check ns's volatile write cache not present When the VWC of a namespace does not exist, the BLK_FEAT_WRITE_CACHE flag should not be set when registering the block device, regardless of whether the controller supports VWC. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c136d64c7d73..0a6e22038ce3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -564,6 +564,7 @@ enum { NVME_NS_FLBAS_META_EXT = 0x10, NVME_NS_NMIC_SHARED = 1 << 0, NVME_NS_ROTATIONAL = 1 << 4, + NVME_NS_VWC_NOT_PRESENT = 1 << 5, NVME_LBAF_RP_BEST = 0, NVME_LBAF_RP_BETTER = 1, NVME_LBAF_RP_GOOD = 2, -- cgit v1.2.3 From 1af75b2ad08bd5977c51c2d0fc11741a4c0a48d9 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 10 Nov 2024 09:33:40 -0800 Subject: firmware: qcom: scm: Introduce CP_SMMU_APERTURE_ID The QCOM_SCM_SVC_MP service provides QCOM_SCM_MP_CP_SMMU_APERTURE_ID, which is used to trigger the mapping of register banks into the SMMU context for per-processes page tables to function (in case this isn't statically setup by firmware). This is necessary on e.g. QCS6490 Rb3Gen2, in order to avoid "CP | AHB bus error"-errors from the GPU. Introduce a function to allow the msm driver to invoke this call. Signed-off-by: Bjorn Andersson Reviewed-by: Rob Clark Link: https://lore.kernel.org/r/20241110-adreno-smmu-aparture-v2-1-9b1fb2ee41d4@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 9f14976399ab..4621aec0328c 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -85,6 +85,8 @@ int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); bool qcom_scm_restore_sec_cfg_available(void); int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_set_gpu_smmu_aperture(unsigned int context_bank); +bool qcom_scm_set_gpu_smmu_aperture_is_available(void); int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -- cgit v1.2.3 From 7f4b3960e54faec72132a71da4a84a8e2a0b9037 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 Nov 2024 11:41:44 +0100 Subject: net: netlink: add nla_get_*_default() accessors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are quite a number of places that use patterns such as if (attr) val = nla_get_u16(attr); else val = DEFAULT; Add nla_get_u16_default() and friends like that to not have to type this out all the time. Acked-by: Toke Høiland-Jørgensen Acked-by: Jakub Kicinski Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241108114145.acd2aadb03ac.I3df6aac71d38a5baa1c0a03d0c7e82d4395c030e@changeid Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 2dc671c977ff..39eaa6be6ca8 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -142,6 +142,8 @@ * nla_get_flag(nla) return 1 if flag is true * nla_get_msecs(nla) get payload for a msecs attribute * + * The same functions also exist with _default(). + * * Attribute Misc: * nla_memcpy(dest, nla, count) copy attribute into memory * nla_memcmp(nla, data, size) compare attribute with memory area @@ -1695,6 +1697,20 @@ static inline u32 nla_get_u32(const struct nlattr *nla) return *(u32 *) nla_data(nla); } +/** + * nla_get_u32_default - return payload of u32 attribute or default + * @nla: u32 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline u32 nla_get_u32_default(const struct nlattr *nla, u32 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_u32(nla); +} + /** * nla_get_be32 - return payload of __be32 attribute * @nla: __be32 netlink attribute @@ -1704,6 +1720,21 @@ static inline __be32 nla_get_be32(const struct nlattr *nla) return *(__be32 *) nla_data(nla); } +/** + * nla_get_be32_default - return payload of be32 attribute or default + * @nla: __be32 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __be32 nla_get_be32_default(const struct nlattr *nla, + __be32 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_be32(nla); +} + /** * nla_get_le32 - return payload of __le32 attribute * @nla: __le32 netlink attribute @@ -1713,6 +1744,21 @@ static inline __le32 nla_get_le32(const struct nlattr *nla) return *(__le32 *) nla_data(nla); } +/** + * nla_get_le32_default - return payload of le32 attribute or default + * @nla: __le32 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __le32 nla_get_le32_default(const struct nlattr *nla, + __le32 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_le32(nla); +} + /** * nla_get_u16 - return payload of u16 attribute * @nla: u16 netlink attribute @@ -1722,6 +1768,20 @@ static inline u16 nla_get_u16(const struct nlattr *nla) return *(u16 *) nla_data(nla); } +/** + * nla_get_u16_default - return payload of u16 attribute or default + * @nla: u16 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline u16 nla_get_u16_default(const struct nlattr *nla, u16 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_u16(nla); +} + /** * nla_get_be16 - return payload of __be16 attribute * @nla: __be16 netlink attribute @@ -1731,6 +1791,21 @@ static inline __be16 nla_get_be16(const struct nlattr *nla) return *(__be16 *) nla_data(nla); } +/** + * nla_get_be16_default - return payload of be16 attribute or default + * @nla: __be16 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __be16 nla_get_be16_default(const struct nlattr *nla, + __be16 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_be16(nla); +} + /** * nla_get_le16 - return payload of __le16 attribute * @nla: __le16 netlink attribute @@ -1740,6 +1815,21 @@ static inline __le16 nla_get_le16(const struct nlattr *nla) return *(__le16 *) nla_data(nla); } +/** + * nla_get_le16_default - return payload of le16 attribute or default + * @nla: __le16 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __le16 nla_get_le16_default(const struct nlattr *nla, + __le16 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_le16(nla); +} + /** * nla_get_u8 - return payload of u8 attribute * @nla: u8 netlink attribute @@ -1749,6 +1839,20 @@ static inline u8 nla_get_u8(const struct nlattr *nla) return *(u8 *) nla_data(nla); } +/** + * nla_get_u8_default - return payload of u8 attribute or default + * @nla: u8 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline u8 nla_get_u8_default(const struct nlattr *nla, u8 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_u8(nla); +} + /** * nla_get_u64 - return payload of u64 attribute * @nla: u64 netlink attribute @@ -1762,6 +1866,20 @@ static inline u64 nla_get_u64(const struct nlattr *nla) return tmp; } +/** + * nla_get_u64_default - return payload of u64 attribute or default + * @nla: u64 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline u64 nla_get_u64_default(const struct nlattr *nla, u64 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_u64(nla); +} + /** * nla_get_uint - return payload of uint attribute * @nla: uint netlink attribute @@ -1773,6 +1891,20 @@ static inline u64 nla_get_uint(const struct nlattr *nla) return nla_get_u64(nla); } +/** + * nla_get_uint_default - return payload of uint attribute or default + * @nla: uint netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline u64 nla_get_uint_default(const struct nlattr *nla, u64 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_uint(nla); +} + /** * nla_get_be64 - return payload of __be64 attribute * @nla: __be64 netlink attribute @@ -1786,6 +1918,21 @@ static inline __be64 nla_get_be64(const struct nlattr *nla) return tmp; } +/** + * nla_get_be64_default - return payload of be64 attribute or default + * @nla: __be64 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __be64 nla_get_be64_default(const struct nlattr *nla, + __be64 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_be64(nla); +} + /** * nla_get_le64 - return payload of __le64 attribute * @nla: __le64 netlink attribute @@ -1795,6 +1942,21 @@ static inline __le64 nla_get_le64(const struct nlattr *nla) return *(__le64 *) nla_data(nla); } +/** + * nla_get_le64_default - return payload of le64 attribute or default + * @nla: __le64 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __le64 nla_get_le64_default(const struct nlattr *nla, + __le64 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_le64(nla); +} + /** * nla_get_s32 - return payload of s32 attribute * @nla: s32 netlink attribute @@ -1804,6 +1966,20 @@ static inline s32 nla_get_s32(const struct nlattr *nla) return *(s32 *) nla_data(nla); } +/** + * nla_get_s32_default - return payload of s32 attribute or default + * @nla: s32 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline s32 nla_get_s32_default(const struct nlattr *nla, s32 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_s32(nla); +} + /** * nla_get_s16 - return payload of s16 attribute * @nla: s16 netlink attribute @@ -1813,6 +1989,20 @@ static inline s16 nla_get_s16(const struct nlattr *nla) return *(s16 *) nla_data(nla); } +/** + * nla_get_s16_default - return payload of s16 attribute or default + * @nla: s16 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline s16 nla_get_s16_default(const struct nlattr *nla, s16 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_s16(nla); +} + /** * nla_get_s8 - return payload of s8 attribute * @nla: s8 netlink attribute @@ -1822,6 +2012,20 @@ static inline s8 nla_get_s8(const struct nlattr *nla) return *(s8 *) nla_data(nla); } +/** + * nla_get_s8_default - return payload of s8 attribute or default + * @nla: s8 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline s8 nla_get_s8_default(const struct nlattr *nla, s8 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_s8(nla); +} + /** * nla_get_s64 - return payload of s64 attribute * @nla: s64 netlink attribute @@ -1835,6 +2039,20 @@ static inline s64 nla_get_s64(const struct nlattr *nla) return tmp; } +/** + * nla_get_s64_default - return payload of s64 attribute or default + * @nla: s64 netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline s64 nla_get_s64_default(const struct nlattr *nla, s64 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_s64(nla); +} + /** * nla_get_sint - return payload of uint attribute * @nla: uint netlink attribute @@ -1846,6 +2064,20 @@ static inline s64 nla_get_sint(const struct nlattr *nla) return nla_get_s64(nla); } +/** + * nla_get_sint_default - return payload of sint attribute or default + * @nla: sint netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline s64 nla_get_sint_default(const struct nlattr *nla, s64 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_sint(nla); +} + /** * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute @@ -1868,6 +2100,21 @@ static inline unsigned long nla_get_msecs(const struct nlattr *nla) return msecs_to_jiffies((unsigned long) msecs); } +/** + * nla_get_msecs_default - return payload of msecs attribute or default + * @nla: msecs netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline unsigned long nla_get_msecs_default(const struct nlattr *nla, + unsigned long defvalue) +{ + if (!nla) + return defvalue; + return nla_get_msecs(nla); +} + /** * nla_get_in_addr - return payload of IPv4 address attribute * @nla: IPv4 address netlink attribute @@ -1877,6 +2124,21 @@ static inline __be32 nla_get_in_addr(const struct nlattr *nla) return *(__be32 *) nla_data(nla); } +/** + * nla_get_in_addr_default - return payload of be32 attribute or default + * @nla: IPv4 address netlink attribute, may be %NULL + * @defvalue: default value to use if @nla is %NULL + * + * Return: the value of the attribute, or the default value if not present + */ +static inline __be32 nla_get_in_addr_default(const struct nlattr *nla, + __be32 defvalue) +{ + if (!nla) + return defvalue; + return nla_get_in_addr(nla); +} + /** * nla_get_in6_addr - return payload of IPv6 address attribute * @nla: IPv6 address netlink attribute -- cgit v1.2.3 From b376d519bd142c65ba9bba35db12b6be95b46893 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:16 -0400 Subject: xdrgen: Implement big-endian enums Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5f775e104f9a..a2ab813a9800 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -680,6 +680,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) return 0; } +/** + * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = *p; + return 0; +} + /** * xdr_stream_decode_u64 - Decode a 64-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 631c2925bae41c11dcf3915a2ab5f3be9af54277 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:33 -0400 Subject: xdrgen: Keep track of on-the-wire data type widths The generic parts of the RPC layer need to know the widths (in XDR_UNIT increments) of the XDR data types defined for each protocol. As a first step, add dictionaries to keep track of the symbolic and actual maximum XDR width of XDR types. This makes it straightforward to look up the width of a type by its name. The built-in dictionaries are pre-loaded with the widths of the built-in XDR types as defined in RFC 4506. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdrgen/_defs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h index be9e62371758..20c7270aa64d 100644 --- a/include/linux/sunrpc/xdrgen/_defs.h +++ b/include/linux/sunrpc/xdrgen/_defs.h @@ -23,4 +23,13 @@ typedef struct { u8 *data; } opaque; +#define XDR_void (0) +#define XDR_bool (1) +#define XDR_int (1) +#define XDR_unsigned_int (1) +#define XDR_long (1) +#define XDR_unsigned_long (1) +#define XDR_hyper (2) +#define XDR_unsigned_hyper (2) + #endif /* _SUNRPC_XDRGEN__DEFS_H_ */ -- cgit v1.2.3 From 65941f10caf2c04781a7defa4ec0ab119dbd235a Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 28 Oct 2024 19:53:37 +0800 Subject: mm: move the page fragment allocator from page_alloc into its own file Inspired by [1], move the page fragment allocator from page_alloc into its own c file and header file, as we are about to make more change for it to replace another page_frag implementation in sock.c As this patchset is going to replace 'struct page_frag' with 'struct page_frag_cache' in sched.h, including page_frag_cache.h in sched.h has a compiler error caused by interdependence between mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler error by moving 'struct page_frag_cache' to mm_types_task.h as suggested by Alexander, see [3]. 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/ 2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956920@gmail.com/ 3. https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=YM_aiA4OomD-2wXykO42bizaWMt_HOA@mail.gmail.com/ CC: David Howells CC: Linux-MM Signed-off-by: Yunsheng Lin Acked-by: Andrew Morton Reviewed-by: Alexander Duyck Link: https://patch.msgid.link/20241028115343.3405838-3-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/gfp.h | 22 ---------------------- include/linux/mm_types.h | 18 ------------------ include/linux/mm_types_task.h | 18 ++++++++++++++++++ include/linux/page_frag_cache.h | 31 +++++++++++++++++++++++++++++++ include/linux/skbuff.h | 1 + 5 files changed, 50 insertions(+), 40 deletions(-) create mode 100644 include/linux/page_frag_cache.h (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a951de920e20..a0a6d25f883f 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -struct page_frag_cache; -void page_frag_cache_drain(struct page_frag_cache *nc); -extern void __page_frag_cache_drain(struct page *page, unsigned int count); -void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, - gfp_t gfp_mask, unsigned int align_mask); - -static inline void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align) -{ - WARN_ON_ONCE(!is_power_of_2(align)); - return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); -} - -static inline void *page_frag_alloc(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask) -{ - return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); -} - -extern void page_frag_free(void *addr); - #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e3bdf8e38bc..92314ef2d978 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) -#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) -#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) - /* * page_private can be used on tail pages. However, PagePrivate is only * checked by the VM on the head page. So page_private on the tail pages @@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } -struct page_frag_cache { - void * va; -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) - __u16 offset; - __u16 size; -#else - __u32 offset; -#endif - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_refcount every time we allocate a fragment. - */ - unsigned int pagecnt_bias; - bool pfmemalloc; -}; - typedef unsigned long vm_flags_t; /* diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index bff5706b76e1..0ac6daebdd5c 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -8,6 +8,7 @@ * (These are defined separately to decouple sched.h from mm_types.h as much as possible.) */ +#include #include #include @@ -43,6 +44,23 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +struct page_frag_cache { + void *va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_refcount every time we allocate a fragment. + */ + unsigned int pagecnt_bias; + bool pfmemalloc; +}; + /* Track pages that require TLB flushes */ struct tlbflush_unmap_batch { #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h new file mode 100644 index 000000000000..67ac8626ed9b --- /dev/null +++ b/include/linux/page_frag_cache.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_PAGE_FRAG_CACHE_H +#define _LINUX_PAGE_FRAG_CACHE_H + +#include +#include +#include + +void page_frag_cache_drain(struct page_frag_cache *nc); +void __page_frag_cache_drain(struct page *page, unsigned int count); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} + +static inline void *page_frag_alloc(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); +} + +void page_frag_free(void *addr); + +#endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e5095d75abba..60535c706851 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include -- cgit v1.2.3 From 3d18dfe69ce46f106af327736d2261d7e3ee81c0 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 28 Oct 2024 19:53:39 +0800 Subject: mm: page_frag: avoid caller accessing 'page_frag_cache' directly Use appropriate frag_page API instead of caller accessing 'page_frag_cache' directly. CC: Andrew Morton CC: Linux-MM Signed-off-by: Yunsheng Lin Reviewed-by: Alexander Duyck Acked-by: Chuck Lever Link: https://patch.msgid.link/20241028115343.3405838-5-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/page_frag_cache.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h index 67ac8626ed9b..0a52f7a179c8 100644 --- a/include/linux/page_frag_cache.h +++ b/include/linux/page_frag_cache.h @@ -7,6 +7,16 @@ #include #include +static inline void page_frag_cache_init(struct page_frag_cache *nc) +{ + nc->va = NULL; +} + +static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) +{ + return !!nc->pfmemalloc; +} + void page_frag_cache_drain(struct page_frag_cache *nc); void __page_frag_cache_drain(struct page *page, unsigned int count); void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, -- cgit v1.2.3 From 0c3ce2f50261cd2f654d931eeb933c370a3a7d7a Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 28 Oct 2024 19:53:41 +0800 Subject: mm: page_frag: reuse existing space for 'size' and 'pfmemalloc' Currently there is one 'struct page_frag' for every 'struct sock' and 'struct task_struct', we are about to replace the 'struct page_frag' with 'struct page_frag_cache' for them. Before begin the replacing, we need to ensure the size of 'struct page_frag_cache' is not bigger than the size of 'struct page_frag', as there may be tens of thousands of 'struct sock' and 'struct task_struct' instances in the system. By or'ing the page order & pfmemalloc with lower bits of 'va' instead of using 'u16' or 'u32' for page size and 'u8' for pfmemalloc, we are able to avoid 3 or 5 bytes space waste. And page address & pfmemalloc & order is unchanged for the same page in the same 'page_frag_cache' instance, it makes sense to fit them together. After this patch, the size of 'struct page_frag_cache' should be the same as the size of 'struct page_frag'. CC: Andrew Morton CC: Linux-MM Signed-off-by: Yunsheng Lin Reviewed-by: Alexander Duyck Link: https://patch.msgid.link/20241028115343.3405838-7-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/mm_types_task.h | 19 +++++++++++-------- include/linux/page_frag_cache.h | 24 ++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 0ac6daebdd5c..a82aa80c0ba4 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -47,18 +47,21 @@ struct page_frag { #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) struct page_frag_cache { - void *va; -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* encoded_page consists of the virtual address, pfmemalloc bit and + * order of a page. + */ + unsigned long encoded_page; + + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_refcount every time we allocate a fragment. + */ +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32) __u16 offset; - __u16 size; + __u16 pagecnt_bias; #else __u32 offset; + __u32 pagecnt_bias; #endif - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_refcount every time we allocate a fragment. - */ - unsigned int pagecnt_bias; - bool pfmemalloc; }; /* Track pages that require TLB flushes */ diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h index 0a52f7a179c8..41a91df82631 100644 --- a/include/linux/page_frag_cache.h +++ b/include/linux/page_frag_cache.h @@ -3,18 +3,38 @@ #ifndef _LINUX_PAGE_FRAG_CACHE_H #define _LINUX_PAGE_FRAG_CACHE_H +#include #include #include #include +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) +/* Use a full byte here to enable assembler optimization as the shift + * operation is usually expecting a byte. + */ +#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0) +#else +/* Compiler should be able to figure out we don't read things as any value + * ANDed with 0 is 0. + */ +#define PAGE_FRAG_CACHE_ORDER_MASK 0 +#endif + +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1) + +static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page) +{ + return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT); +} + static inline void page_frag_cache_init(struct page_frag_cache *nc) { - nc->va = NULL; + nc->encoded_page = 0; } static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) { - return !!nc->pfmemalloc; + return encoded_page_decode_pfmemalloc(nc->encoded_page); } void page_frag_cache_drain(struct page_frag_cache *nc); -- cgit v1.2.3 From d2bd39c0456b75be9dfc7d774b8d021355c26ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:49 +0300 Subject: PCI: Store all PCIe Supported Link Speeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe bandwidth controller added by a subsequent commit will require selecting PCIe Link Speeds that are lower than the Maximum Link Speed. The struct pci_bus only stores max_bus_speed. Even if PCIe r6.1 sec 8.2.1 currently disallows gaps in supported Link Speeds, the Implementation Note in PCIe r6.1 sec 7.5.3.18, recommends determining supported Link Speeds using the Supported Link Speeds Vector in the Link Capabilities 2 Register (when available) to "avoid software being confused if a future specification defines Links that do not require support for all slower speeds." Reuse code in pcie_get_speed_cap() to add pcie_get_supported_speeds() to query the Supported Link Speeds Vector of a PCIe device. The value is taken directly from the Supported Link Speeds Vector or synthesized from the Max Link Speed in the Link Capabilities Register when the Link Capabilities 2 Register is not available. The Supported Link Speeds Vector in the Link Capabilities Register 2 corresponds to the bus below on Root Ports and Downstream Ports, whereas it corresponds to the bus above on Upstream Ports and Endpoints (PCIe r6.1 sec 7.5.3.18): Supported Link Speeds Vector - This field indicates the supported Link speed(s) of the associated Port. Add supported_speeds into the struct pci_dev that caches the Supported Link Speeds Vector. supported_speeds contains a set of Link Speeds only in the case where PCIe Link Speed can be determined. Root Complex Integrated Endpoints do not have a well-defined Link Speed because they do not implement either of the Link Capabilities Registers, which is allowed by PCIe r6.1 sec 7.5.3 (the same limitation applies to determining cur_bus_speed and max_bus_speed that are PCI_SPEED_UNKNOWN in such case). This is of no concern from PCIe bandwidth controller point of view because such devices are not attached into a PCIe Root Port that could be controlled. The supported_speeds field keeps the extra reserved zero at the least significant bit to match the Link Capabilities 2 Register layout. An attempt was made to store supported_speeds field into the struct pci_bus as an intersection of both ends of the Link, however, the subordinate struct pci_bus is not available early enough. The Target Speed quirk (in pcie_failed_link_retrain()) can run either during initial scan or later, requiring it to use the API provided by the PCIe bandwidth controller to set the Target Link Speed in order to co-exist with the bandwidth controller. When the Target Speed quirk is calling the bandwidth controller during initial scan, the struct pci_bus is not yet initialized. As such, storing supported_speeds into the struct pci_bus is not viable. Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/20241018144755.7875-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: move pcie_get_supported_speeds() decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 10 +++++++++- include/uapi/linux/pci_regs.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index be5ed534c39c..99c6fa30d25b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -318,7 +318,14 @@ struct pci_sriov; struct pci_p2pdma; struct rcec_ea; -/* The pci_dev structure describes PCI devices */ +/* struct pci_dev - describes a PCI device + * + * @supported_speeds: PCIe Supported Link Speeds Vector (+ reserved 0 at + * LSB). 0 when the supported speeds cannot be + * determined (e.g., for Root Complex Integrated + * Endpoints without the relevant Capability + * Registers). + */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ struct pci_bus *bus; /* Bus this device is on */ @@ -522,6 +529,7 @@ struct pci_dev { struct npem *npem; /* Native PCIe Enclosure Management */ #endif u16 acs_cap; /* ACS Capability offset */ + u8 supported_speeds; /* Supported Link Speeds Vector */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ /* diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..f3c9de0a497c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -678,6 +678,7 @@ #define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ #define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ +#define PCI_EXP_LNKCAP2_SLS 0x000000fe /* Supported Link Speeds Vector */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ -- cgit v1.2.3 From 38a18dfe9035d5a02a53271824de1854129c61dc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:51 -0700 Subject: PCI: Unexport pci_walk_bus_locked() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's only one user of pci_walk_bus_locked(), and it's internal to the PCI core. Unexport it and make it private to drivers/pci/. Link: https://lore.kernel.org/r/20241022224851.340648-6-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: move decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Reviewed-by: Ilpo Järvinen --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..056290377273 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1623,8 +1623,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); -void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); -- cgit v1.2.3 From fb56007c9bc38550755a53f7afd71a287340b724 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 Nov 2024 06:10:19 +0000 Subject: vt_buffer.h: get rid of dead code in default scr_...() instances Only 4 architectures define VT_BUF_HAVE_RW (alpha, mips, powerpc, sparc) and all of them define VT_BUF_HAVE_MEM{SET,CPY,MOVE}W. In other words, the code under #ifdef VT_BUF_HAVE_RW in default scr_mem...w() instances won't be compiled anyway. Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann --- include/linux/vt_buffer.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h index 919d999a8c1d..b6eeb8cb6070 100644 --- a/include/linux/vt_buffer.h +++ b/include/linux/vt_buffer.h @@ -28,45 +28,21 @@ #ifndef VT_BUF_HAVE_MEMSETW static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - count /= 2; - while (count--) - scr_writew(c, s++); -#else memset16(s, c, count / 2); -#endif } #endif #ifndef VT_BUF_HAVE_MEMCPYW static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - count /= 2; - while (count--) - scr_writew(scr_readw(s++), d++); -#else memcpy(d, s, count); -#endif } #endif #ifndef VT_BUF_HAVE_MEMMOVEW static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - if (d < s) - scr_memcpyw(d, s, count); - else { - count /= 2; - d += count; - s += count; - while (count--) - scr_writew(scr_readw(--s), --d); - } -#else memmove(d, s, count); -#endif } #endif -- cgit v1.2.3 From 0af8e32343f8d0db31f593464fc140eaef25a281 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 Nov 2024 06:11:47 +0000 Subject: empty include/asm-generic/vga.h all places that use anything defined in it (vgacon, mdacon and vga16fb) are built only on architectures that have all that stuff in their native asm/vga.h allows to kill stub asm/vga.h on sh, while we are at it... Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann --- include/asm-generic/vga.h | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h index adf91a783b5c..5dcaf4ae904a 100644 --- a/include/asm-generic/vga.h +++ b/include/asm-generic/vga.h @@ -1,25 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares - */ #ifndef __ASM_GENERIC_VGA_H #define __ASM_GENERIC_VGA_H - -/* - * On most architectures that support VGA, we can just - * recalculate addresses and then access the videoram - * directly without any black magic. - * - * Everyone else needs to ioremap the address and use - * proper I/O accesses. - */ -#ifndef VGA_MAP_MEM -#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) -#endif - -#define vga_readb(x) (*(x)) -#define vga_writeb(x, y) (*(y) = (x)) - -#endif /* _ASM_GENERIC_VGA_H */ +#endif /* __ASM_GENERIC_VGA_H */ -- cgit v1.2.3 From 82ee16cfb290ae259d1cd6658a6988b430258e94 Mon Sep 17 00:00:00 2001 From: Karel Balej Date: Sat, 12 Oct 2024 21:31:39 +0200 Subject: rtc: add driver for Marvell 88PM886 PMIC RTC RTC lives on the chip's base register page. Add the relevant register definitions and implement a basic set/read time functionality. Tested with the samsung,coreprimevelte smartphone which contains this PMIC and whose vendor kernel tree has also served as the sole reference for this. Signed-off-by: Karel Balej Acked-by: Lee Jones Link: https://lore.kernel.org/r/20241012193345.18594-2-balejk@matfyz.cz Signed-off-by: Alexandre Belloni --- include/linux/mfd/88pm886.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h index 133aa302e492..85eca44f39ab 100644 --- a/include/linux/mfd/88pm886.h +++ b/include/linux/mfd/88pm886.h @@ -31,6 +31,15 @@ #define PM886_INT_WC BIT(1) #define PM886_INT_MASK_MODE BIT(2) +#define PM886_REG_RTC_CNT1 0xd1 +#define PM886_REG_RTC_CNT2 0xd2 +#define PM886_REG_RTC_CNT3 0xd3 +#define PM886_REG_RTC_CNT4 0xd4 +#define PM886_REG_RTC_SPARE1 0xea +#define PM886_REG_RTC_SPARE2 0xeb +#define PM886_REG_RTC_SPARE3 0xec +#define PM886_REG_RTC_SPARE4 0xed +#define PM886_REG_RTC_SPARE5 0xee #define PM886_REG_RTC_SPARE6 0xef #define PM886_REG_BUCK_EN 0x08 -- cgit v1.2.3 From 8b9a7bd4d6c83300e50bb1d7071c6032a07e2fed Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Nov 2024 13:00:45 +0000 Subject: rxrpc: Add a tracepoint for aborts being proposed Add a tracepoint to rxrpc to trace the proposal of an abort. The abort is performed asynchronously by the I/O thread. Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/726356.1730898045@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cc22596c7250..d03e0bd8c028 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -773,6 +773,31 @@ TRACE_EVENT(rxrpc_rx_done, TP_printk("r=%d a=%d", __entry->result, __entry->abort_code) ); +TRACE_EVENT(rxrpc_abort_call, + TP_PROTO(const struct rxrpc_call *call, int abort_code), + + TP_ARGS(call, abort_code), + + TP_STRUCT__entry( + __field(unsigned int, call_nr) + __field(enum rxrpc_abort_reason, why) + __field(int, abort_code) + __field(int, error) + ), + + TP_fast_assign( + __entry->call_nr = call->debug_id; + __entry->why = call->send_abort_why; + __entry->abort_code = abort_code; + __entry->error = call->send_abort_err; + ), + + TP_printk("c=%08x a=%d e=%d %s", + __entry->call_nr, + __entry->abort_code, __entry->error, + __print_symbolic(__entry->why, rxrpc_abort_reasons)) + ); + TRACE_EVENT(rxrpc_abort, TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why, u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error), -- cgit v1.2.3 From a7306f3c283bfe03611229bb6280987aae2af8f9 Mon Sep 17 00:00:00 2001 From: Nataniel Farzan Date: Mon, 4 Nov 2024 19:22:32 -0800 Subject: Improve consistency of '#error' directive messages Remove the use of contractions and use proper punctuation in #error directive messages that discourage the direct inclusion of header files. Link: https://lkml.kernel.org/r/20241105032231.28833-1-natanielfarzan@gmail.com Signed-off-by: Nataniel Farzan Signed-off-by: Andrew Morton --- include/acpi/platform/aclinux.h | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler-gcc.h | 2 +- include/linux/pm_wakeup.h | 2 +- include/linux/rwlock.h | 2 +- include/linux/rwlock_api_smp.h | 2 +- include/linux/spinlock_api_smp.h | 2 +- include/linux/spinlock_types_up.h | 2 +- include/linux/spinlock_up.h | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 565341c826e3..f3249b7df5cb 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -15,7 +15,7 @@ /* ACPICA external files should not include ACPICA headers directly. */ #if !defined(BUILDING_ACPICA) && !defined(_LINUX_ACPI_H) -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif #endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 4c1a39dcb624..2e7c2c282f3a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif /* Compiler specific definitions for Clang compiler */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cd6f9aae311f..d0ed9583743f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif /* diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 76cd1f9f1365..222f7530806c 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -10,7 +10,7 @@ #define _LINUX_PM_WAKEUP_H #ifndef _DEVICE_H_ -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index c0ef596f340b..5b87c6f4a243 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -2,7 +2,7 @@ #define __LINUX_RWLOCK_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index dceb0a59b692..31d3d1116323 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -2,7 +2,7 @@ #define __LINUX_RWLOCK_API_SMP_H #ifndef __LINUX_SPINLOCK_API_SMP_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 89eb6f4c659c..9ecb0ab504e3 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_API_SMP_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 7f86a2016ac5..fc4e2d017c20 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_TYPES_UP_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index c87204247592..1e84e71ca495 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_UP_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include /* for cpu_relax() */ -- cgit v1.2.3 From bc73b4186736341ab5cd2c199da82db6e1134e13 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 5 Nov 2024 16:54:05 +0200 Subject: util_macros.h: fix/rework find_closest() macros A bug was found in the find_closest() (find_closest_descending() is also affected after some testing), where for certain values with small progressions, the rounding (done by averaging 2 values) causes an incorrect index to be returned. The rounding issues occur for progressions of 1, 2 and 3. It goes away when the progression/interval between two values is 4 or larger. It's particularly bad for progressions of 1. For example if there's an array of 'a = { 1, 2, 3 }', using 'find_closest(2, a ...)' would return 0 (the index of '1'), rather than returning 1 (the index of '2'). This means that for exact values (with a progression of 1), find_closest() will misbehave and return the index of the value smaller than the one we're searching for. For progressions of 2 and 3, the exact values are obtained correctly; but values aren't approximated correctly (as one would expect). Starting with progressions of 4, all seems to be good (one gets what one would expect). While one could argue that 'find_closest()' should not be used for arrays with progressions of 1 (i.e. '{1, 2, 3, ...}', the macro should still behave correctly. The bug was found while testing the 'drivers/iio/adc/ad7606.c', specifically the oversampling feature. For reference, the oversampling values are listed as: static const unsigned int ad7606_oversampling_avail[7] = { 1, 2, 4, 8, 16, 32, 64, }; When doing: 1. $ echo 1 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 1 # this is fine 2. $ echo 2 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 1 # this is wrong; 2 should be returned here 3. $ echo 3 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 2 # this is fine 4. $ echo 4 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 4 # this is fine And from here-on, the values are as correct (one gets what one would expect.) While writing a kunit test for this bug, a peculiar issue was found for the array in the 'drivers/hwmon/ina2xx.c' & 'drivers/iio/adc/ina2xx-adc.c' drivers. While running the kunit test (for 'ina226_avg_tab' from these drivers): * idx = find_closest([-1 to 2], ina226_avg_tab, ARRAY_SIZE(ina226_avg_tab)); This returns idx == 0, so value. * idx = find_closest(3, ina226_avg_tab, ARRAY_SIZE(ina226_avg_tab)); This returns idx == 0, value 1; and now one could argue whether 3 is closer to 4 or to 1. This quirk only appears for value '3' in this array, but it seems to be a another rounding issue. * And from 4 onwards the 'find_closest'() works fine (one gets what one would expect). This change reworks the find_closest() macros to also check the difference between the left and right elements when 'x'. If the distance to the right is smaller (than the distance to the left), the index is incremented by 1. This also makes redundant the need for using the DIV_ROUND_CLOSEST() macro. In order to accommodate for any mix of negative + positive values, the internal variables '__fc_x', '__fc_mid_x', '__fc_left' & '__fc_right' are forced to 'long' type. This also addresses any potential bugs/issues with 'x' being of an unsigned type. In those situations any comparison between signed & unsigned would be promoted to a comparison between 2 unsigned numbers; this is especially annoying when '__fc_left' & '__fc_right' underflow. The find_closest_descending() macro was also reworked and duplicated from the find_closest(), and it is being iterated in reverse. The main reason for this is to get the same indices as 'find_closest()' (but in reverse). The comparison for '__fc_right < __fc_left' favors going the array in ascending order. For example for array '{ 1024, 512, 256, 128, 64, 16, 4, 1 }' and x = 3, we get: __fc_mid_x = 2 __fc_left = -1 __fc_right = -2 Then '__fc_right < __fc_left' evaluates to true and '__fc_i++' becomes 7 which is not quite incorrect, but 3 is closer to 4 than to 1. This change has been validated with the kunit from the next patch. Link: https://lkml.kernel.org/r/20241105145406.554365-1-aardelean@baylibre.com Fixes: 95d119528b0b ("util_macros.h: add find_closest() macro") Signed-off-by: Alexandru Ardelean Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 56 ++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 6bb460c3e818..825487fb66fa 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -4,19 +4,6 @@ #include -#define __find_closest(x, a, as, op) \ -({ \ - typeof(as) __fc_i, __fc_as = (as) - 1; \ - typeof(x) __fc_x = (x); \ - typeof(*a) const *__fc_a = (a); \ - for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ - if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ - __fc_a[__fc_i + 1], 2)) \ - break; \ - } \ - (__fc_i); \ -}) - /** * find_closest - locate the closest element in a sorted array * @x: The reference value. @@ -25,8 +12,27 @@ * @as: Size of 'a'. * * Returns the index of the element closest to 'x'. + * Note: If using an array of negative numbers (or mixed positive numbers), + * then be sure that 'x' is of a signed-type to get good results. */ -#define find_closest(x, a, as) __find_closest(x, a, as, <=) +#define find_closest(x, a, as) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + long __fc_mid_x, __fc_x = (x); \ + long __fc_left, __fc_right; \ + typeof(*a) const *__fc_a = (a); \ + for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ + __fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i + 1]) / 2; \ + if (__fc_x <= __fc_mid_x) { \ + __fc_left = __fc_x - __fc_a[__fc_i]; \ + __fc_right = __fc_a[__fc_i + 1] - __fc_x; \ + if (__fc_right < __fc_left) \ + __fc_i++; \ + break; \ + } \ + } \ + (__fc_i); \ +}) /** * find_closest_descending - locate the closest element in a sorted array @@ -36,9 +42,27 @@ * @as: Size of 'a'. * * Similar to find_closest() but 'a' is expected to be sorted in descending - * order. + * order. The iteration is done in reverse order, so that the comparison + * of '__fc_right' & '__fc_left' also works for unsigned numbers. */ -#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=) +#define find_closest_descending(x, a, as) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + long __fc_mid_x, __fc_x = (x); \ + long __fc_left, __fc_right; \ + typeof(*a) const *__fc_a = (a); \ + for (__fc_i = __fc_as; __fc_i >= 1; __fc_i--) { \ + __fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i - 1]) / 2; \ + if (__fc_x <= __fc_mid_x) { \ + __fc_left = __fc_x - __fc_a[__fc_i]; \ + __fc_right = __fc_a[__fc_i - 1] - __fc_x; \ + if (__fc_right < __fc_left) \ + __fc_i--; \ + break; \ + } \ + } \ + (__fc_i); \ +}) /** * is_insidevar - check if the @ptr points inside the @var memory range. -- cgit v1.2.3 From 3f28bbe56c7b77e73f1dd0515cad009cfdd64962 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 5 Nov 2024 01:52:52 +0800 Subject: mm/list_lru: don't pass unnecessary key parameters Patch series "mm/list_lru: Split list_lru lock into per-cgroup scope". When LOCKDEP is not enabled, lock_class_key is an empty struct that is never used. But the list_lru initialization function still takes a placeholder pointer as parameter, and the compiler cannot optimize it because the function is not static and exported. Remove this parameter and move it inside the list_lru struct. Only use it when LOCKDEP is enabled. Kernel builds with LOCKDEP will be slightly larger, while !LOCKDEP builds without it will be slightly smaller (the common case). Link: https://lkml.kernel.org/r/20241104175257.60853-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20241104175257.60853-2-ryncsn@gmail.com Signed-off-by: Kairui Song Acked-by: Shakeel Butt Cc: Chengming Zhou Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Qi Zheng Cc: Roman Gushchin Cc: Waiman Long Signed-off-by: Andrew Morton --- include/linux/list_lru.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 5099a8ccd5f4..eba93f6511f3 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -56,16 +56,28 @@ struct list_lru { bool memcg_aware; struct xarray xa; #endif +#ifdef CONFIG_LOCKDEP + struct lock_class_key *key; +#endif }; void list_lru_destroy(struct list_lru *lru); int __list_lru_init(struct list_lru *lru, bool memcg_aware, - struct lock_class_key *key, struct shrinker *shrinker); + struct shrinker *shrinker); #define list_lru_init(lru) \ - __list_lru_init((lru), false, NULL, NULL) + __list_lru_init((lru), false, NULL) #define list_lru_init_memcg(lru, shrinker) \ - __list_lru_init((lru), true, NULL, shrinker) + __list_lru_init((lru), true, shrinker) + +static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker *shrinker, + struct lock_class_key *key) +{ +#ifdef CONFIG_LOCKDEP + lru->key = key; +#endif + return list_lru_init_memcg(lru, shrinker); +} int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); -- cgit v1.2.3 From fb56fdf8b9a2f7397f8a83dce50189f3f0cf71af Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 5 Nov 2024 01:52:56 +0800 Subject: mm/list_lru: split the lock to per-cgroup scope Currently, every list_lru has a per-node lock that protects adding, deletion, isolation, and reparenting of all list_lru_one instances belonging to this list_lru on this node. This lock contention is heavy when multiple cgroups modify the same list_lru. This lock can be split into per-cgroup scope to reduce contention. To achieve this, we need a stable list_lru_one for every cgroup. This commit adds a lock to each list_lru_one and introduced a helper function lock_list_lru_of_memcg, making it possible to pin the list_lru of a memcg. Then reworked the reparenting process. Reparenting will switch the list_lru_one instances one by one. By locking each instance and marking it dead using the nr_items counter, reparenting ensures that all items in the corresponding cgroup (on-list or not, because items have a stable cgroup, see below) will see the list_lru_one switch synchronously. Objcg reparent is also moved after list_lru reparent so items will have a stable mem cgroup until all list_lru_one instances are drained. The only caller that doesn't work the *_obj interfaces are direct calls to list_lru_{add,del}. But it's only used by zswap and that's also based on objcg, so it's fine. This also changes the bahaviour of the isolation function when LRU_RETRY or LRU_REMOVED_RETRY is returned, because now releasing the lock could unblock reparenting and free the list_lru_one, isolation function will have to return withoug re-lock the lru. prepare() { mkdir /tmp/test-fs modprobe brd rd_nr=1 rd_size=33554432 mkfs.xfs -f /dev/ram0 mount -t xfs /dev/ram0 /tmp/test-fs for i in $(seq 1 512); do mkdir "/tmp/test-fs/$i" for j in $(seq 1 10240); do echo TEST-CONTENT > "/tmp/test-fs/$i/$j" done & done; wait } do_test() { read_worker() { sleep 1 tar -cv "$1" &>/dev/null } read_in_all() { cd "/tmp/test-fs" && ls for i in $(seq 1 512); do (exec sh -c 'echo "$PPID"') > "/sys/fs/cgroup/benchmark/$i/cgroup.procs" read_worker "$i" & done; wait } for i in $(seq 1 512); do mkdir -p "/sys/fs/cgroup/benchmark/$i" done echo +memory > /sys/fs/cgroup/benchmark/cgroup.subtree_control echo 512M > /sys/fs/cgroup/benchmark/memory.max echo 3 > /proc/sys/vm/drop_caches time read_in_all } Above script simulates compression of small files in multiple cgroups with memory pressure. Run prepare() then do_test for 6 times: Before: real 0m7.762s user 0m11.340s sys 3m11.224s real 0m8.123s user 0m11.548s sys 3m2.549s real 0m7.736s user 0m11.515s sys 3m11.171s real 0m8.539s user 0m11.508s sys 3m7.618s real 0m7.928s user 0m11.349s sys 3m13.063s real 0m8.105s user 0m11.128s sys 3m14.313s After this commit (about ~15% faster): real 0m6.953s user 0m11.327s sys 2m42.912s real 0m7.453s user 0m11.343s sys 2m51.942s real 0m6.916s user 0m11.269s sys 2m43.957s real 0m6.894s user 0m11.528s sys 2m45.346s real 0m6.911s user 0m11.095s sys 2m43.168s real 0m6.773s user 0m11.518s sys 2m40.774s Link: https://lkml.kernel.org/r/20241104175257.60853-6-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Chengming Zhou Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Waiman Long Signed-off-by: Andrew Morton --- include/linux/list_lru.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index eba93f6511f3..10ba9a54d42c 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -32,6 +32,8 @@ struct list_lru_one { struct list_head list; /* may become negative during memcg reparenting */ long nr_items; + /* protects all fields above */ + spinlock_t lock; }; struct list_lru_memcg { @@ -41,11 +43,9 @@ struct list_lru_memcg { }; struct list_lru_node { - /* protects all lists on the node, including per cgroup */ - spinlock_t lock; /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; - long nr_items; + atomic_long_t nr_items; } ____cacheline_aligned_in_smp; struct list_lru { -- cgit v1.2.3 From da0c02516c501b43bd39ad4aca5779c86153d143 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 5 Nov 2024 01:52:57 +0800 Subject: mm/list_lru: simplify the list_lru walk callback function Now isolation no longer takes the list_lru global node lock, only use the per-cgroup lock instead. And this lock is inside the list_lru_one being walked, no longer needed to pass the lock explicitly. Link: https://lkml.kernel.org/r/20241104175257.60853-7-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Chengming Zhou Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Waiman Long Signed-off-by: Andrew Morton --- include/linux/list_lru.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 10ba9a54d42c..05c166811f6b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -184,7 +184,7 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head); typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, - struct list_lru_one *list, spinlock_t *lock, void *cb_arg); + struct list_lru_one *list, void *cb_arg); /** * list_lru_walk_one: walk a @lru, isolating and disposing freeable items. -- cgit v1.2.3 From 7591c127f3b17d5879f18819cad7058bf3a2e276 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Nov 2024 11:19:44 +0000 Subject: kmemleak: iommu/iova: fix transient kmemleak false positive The introduction of iova_depot_pop() in 911aa1245da8 ("iommu/iova: Make the rcache depot scale better") confused kmemleak by moving a struct iova_magazine object from a singly linked list to rcache->depot and resetting the 'next' pointer referencing it. Unlike doubly linked lists, the content of the object being referred is never changed on removal from a singly linked list and the kmemleak checksum heuristics do not detect such scenario. This leads to false positives like: unreferenced object 0xffff8881a5301000 (size 1024): comm "softirq", pid 0, jiffies 4306297099 (age 462.991s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 e7 7d 05 00 00 00 00 00 .........}...... 0f b4 05 00 00 00 00 00 b4 96 05 00 00 00 00 00 ................ backtrace: [] __kmem_cache_alloc_node+0x1e8/0x320 [] kmalloc_trace+0x2a/0x60 [] free_iova_fast+0x28e/0x4e0 [] fq_ring_free_locked+0x1b0/0x310 [] fq_flush_timeout+0x19d/0x2e0 [] call_timer_fn+0x19a/0x5c0 [] __run_timers+0x78b/0xb80 [] run_timer_softirq+0x5d/0xd0 [] __do_softirq+0x205/0x8b5 Introduce kmemleak_transient_leak() which resets the object checksum requiring another scan pass before it is reported (if still unreferenced). Call this new API in iova_depot_pop(). Link: https://lkml.kernel.org/r/20241104111944.2207155-1-catalin.marinas@arm.com Link: https://lore.kernel.org/r/ZY1osaGLyT-sdKE8@shredder/ Signed-off-by: Catalin Marinas Reported-by: Ido Schimmel Tested-by: Ido Schimmel Acked-by: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/kmemleak.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 6a3cd1bf4680..93a73c076d16 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; +extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; @@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr) static inline void kmemleak_not_leak(const void *ptr) { } +static inline void kmemleak_transient_leak(const void *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } -- cgit v1.2.3 From 7269ed4af344184ab9bdf318fe8864cf64849735 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 4 Nov 2024 15:07:12 +0800 Subject: mm: define general function pXd_init() pud_init(), pmd_init() and kernel_pte_init() are duplicated defined in file kasan.c and sparse-vmemmap.c as weak functions. Move them to generic header file pgtable.h, architecture can redefine them. Link: https://lkml.kernel.org/r/20241104070712.52902-1-maobibo@loongson.cn Signed-off-by: Bibo Mao Reviewed-by: Huacai Chen Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Thomas Bogendoerfer Cc: Vincenzo Frascino Cc: WANG Xuerui Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 --- include/linux/pgtable.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 32888a97ab44..5d6cd523c7c0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3819,9 +3819,6 @@ void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); -void pud_init(void *addr); -void pmd_init(void *addr); -void kernel_pte_init(void *addr); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 23aeffd89a4e..adef9d6e9b1b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -90,6 +90,27 @@ static inline unsigned long pud_index(unsigned long address) #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #endif +#ifndef kernel_pte_init +static inline void kernel_pte_init(void *addr) +{ +} +#define kernel_pte_init kernel_pte_init +#endif + +#ifndef pmd_init +static inline void pmd_init(void *addr) +{ +} +#define pmd_init pmd_init +#endif + +#ifndef pud_init +static inline void pud_init(void *addr) +{ +} +#define pud_init pud_init +#endif + #ifndef pte_offset_kernel static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) { -- cgit v1.2.3 From 9b5c87d47949292ff21ee2fadd1e83820662a430 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 5 Nov 2024 12:34:57 +0100 Subject: mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount Since 7d6be67cfdd4 ("mm: mmap_lock: replace get_memcg_path_buf() with on-stack buffer") we use trace_mmap_lock_reg()/unreg() only to maintain an atomic reg_refcount which is checked to avoid performing get_mm_memcg_path() in case none of the tracepoints using it is enabled. This can be achieved directly by putting all the work needed for the tracepoint behind the trace_mmap_lock_##type##_enabled(), as suggested by Documentation/trace/tracepoints.rst and with the following advantages: - uses the tracepoint's static key instead of evaluating a branch - the check tracepoint specific, not shared by all of them - we can get rid of trace_mmap_lock_reg()/unreg() completely Thus use the trace_..._enabled() check and remove unnecessary code. Link: https://lkml.kernel.org/r/20241105113456.95066-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Axel Rasmussen Cc: Tetsuo Handa Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton --- include/trace/events/mmap_lock.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h index f2827f98a44f..bc2e3ad787b3 100644 --- a/include/trace/events/mmap_lock.h +++ b/include/trace/events/mmap_lock.h @@ -10,9 +10,6 @@ struct mm_struct; -extern int trace_mmap_lock_reg(void); -extern void trace_mmap_lock_unreg(void); - DECLARE_EVENT_CLASS(mmap_lock, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), @@ -40,16 +37,15 @@ DECLARE_EVENT_CLASS(mmap_lock, ); #define DEFINE_MMAP_LOCK_EVENT(name) \ - DEFINE_EVENT_FN(mmap_lock, name, \ + DEFINE_EVENT(mmap_lock, name, \ TP_PROTO(struct mm_struct *mm, const char *memcg_path, \ bool write), \ - TP_ARGS(mm, memcg_path, write), \ - trace_mmap_lock_reg, trace_mmap_lock_unreg) + TP_ARGS(mm, memcg_path, write)) DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking); DEFINE_MMAP_LOCK_EVENT(mmap_lock_released); -TRACE_EVENT_FN(mmap_lock_acquire_returned, +TRACE_EVENT(mmap_lock_acquire_returned, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, bool success), @@ -76,9 +72,7 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, __get_str(memcg_path), __entry->write ? "true" : "false", __entry->success ? "true" : "false" - ), - - trace_mmap_lock_reg, trace_mmap_lock_unreg + ) ); #endif /* _TRACE_MMAP_LOCK_H */ -- cgit v1.2.3 From d5ec8d91f82ef78405b506737952dec8af95a95b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 7 Nov 2024 16:48:14 -0800 Subject: rtnetlink: Remove __rtnl_link_unregister(). rtnl_link_unregister() holds RTNL and calls __rtnl_link_unregister(), where we call synchronize_srcu() to wait inflight RTM_NEWLINK requests for per-netns RTNL. We put synchronize_srcu() in __rtnl_link_unregister() due to ifb.ko and dummy.ko. However, rtnl_newlink() will acquire SRCU before RTNL later in this series. Then, lockdep will detect the deadlock: rtnl_link_unregister() rtnl_newlink() ---- ---- lock(rtnl_mutex); lock(&ops->srcu); lock(rtnl_mutex); sync(&ops->srcu); To avoid the problem, we must call synchronize_srcu() before RTNL in rtnl_link_unregister(). As a preparation, let's remove __rtnl_link_unregister(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241108004823.29419-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index b260c0cc9671..3ebfcc6e56fd 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -165,7 +165,6 @@ struct rtnl_link_ops { }; int __rtnl_link_register(struct rtnl_link_ops *ops); -void __rtnl_link_unregister(struct rtnl_link_ops *ops); int rtnl_link_register(struct rtnl_link_ops *ops); void rtnl_link_unregister(struct rtnl_link_ops *ops); -- cgit v1.2.3 From 6b57ff21a3109b1dba2d286ff415463e6fb1fca3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 7 Nov 2024 16:48:15 -0800 Subject: rtnetlink: Protect link_ops by mutex. rtnl_link_unregister() holds RTNL and calls synchronize_srcu(), but rtnl_newlink() will acquire SRCU frist and then RTNL. Then, we need to unlink ops and call synchronize_srcu() outside of RTNL to avoid the deadlock. rtnl_link_unregister() rtnl_newlink() ---- ---- lock(rtnl_mutex); lock(&ops->srcu); lock(rtnl_mutex); sync(&ops->srcu); Let's move as such and add a mutex to protect link_ops. Now, link_ops is protected by its dedicated mutex and rtnl_link_register() no longer needs to hold RTNL. While at it, we move the initialisation of ops->dellink and ops->srcu out of the mutex scope. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241108004823.29419-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 3ebfcc6e56fd..7559020f760c 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -71,7 +71,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) /** * struct rtnl_link_ops - rtnetlink link operations * - * @list: Used internally, protected by RTNL and SRCU + * @list: Used internally, protected by link_ops_mutex and SRCU * @srcu: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit -- cgit v1.2.3 From 68297dbb967f87c3c92af9d2f652270f57c547c7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 7 Nov 2024 16:48:16 -0800 Subject: rtnetlink: Remove __rtnl_link_register() link_ops is protected by link_ops_mutex and no longer needs RTNL, so we have no reason to have __rtnl_link_register() separately. Let's remove it and call rtnl_link_register() from ifb.ko and dummy.ko. Note that both modules' init() work on init_net only, so we need not export pernet_ops_rwsem and can use rtnl_net_lock() there. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241108004823.29419-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 7559020f760c..ef7c11f0d74c 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -164,8 +164,6 @@ struct rtnl_link_ops { int *prividx, int attr); }; -int __rtnl_link_register(struct rtnl_link_ops *ops); - int rtnl_link_register(struct rtnl_link_ops *ops); void rtnl_link_unregister(struct rtnl_link_ops *ops); -- cgit v1.2.3 From 28690e5361c05fd4ef0ca3a17d1c667cba790554 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 7 Nov 2024 16:48:18 -0800 Subject: rtnetlink: Add peer_type in struct rtnl_link_ops. In ops->newlink(), veth, vxcan, and netkit call rtnl_link_get_net() with a net pointer, which is the first argument of ->newlink(). rtnl_link_get_net() could return another netns based on IFLA_NET_NS_PID and IFLA_NET_NS_FD in the peer device's attributes. We want to get it and fill rtnl_nets->nets[] in advance in rtnl_newlink() for per-netns RTNL. All of the three get the peer netns in the same way: 1. Call rtnl_nla_parse_ifinfomsg() 2. Call ops->validate() (vxcan doesn't have) 3. Call rtnl_link_get_net_tb() Let's add a new field peer_type to struct rtnl_link_ops and prefetch netns in the peer ifla to add it to rtnl_nets in rtnl_newlink(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20241108004823.29419-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ef7c11f0d74c..bef76abcff8d 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -75,6 +75,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @srcu: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit + * @peer_type: Peer device specific netlink attribute number (e.g. VETH_INFO_PEER) * @maxtype: Highest device specific netlink attribute number * @policy: Netlink policy for device specific attribute validation * @validate: Optional validation function for netlink/changelink parameters @@ -116,6 +117,7 @@ struct rtnl_link_ops { void (*setup)(struct net_device *dev); bool netns_refund; + const u16 peer_type; unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], -- cgit v1.2.3 From 636af13f213bf9b28a34254327934bc72a797754 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 7 Nov 2024 16:48:23 -0800 Subject: rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP. Currently, rtnl_setlink() and rtnl_dellink() cannot be fully converted to per-netns RTNL due to a lack of handling peer/lower/upper devices in different netns. For example, when we change a device in rtnl_setlink() and need to propagate that to its upper devices, we want to avoid acquiring all netns locks, for which we do not know the upper limit. The same situation happens when we remove a device. rtnl_dellink() could be transformed to remove a single device in the requested netns and delegate other devices to per-netns work, and rtnl_setlink() might be ? Until we come up with a better idea, let's use a new flag RTNL_FLAG_DOIT_PERNET_WIP for rtnl_dellink() and rtnl_setlink(). This will unblock converting RTNL users where such devices are not related. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Nikolay Aleksandrov Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241108004823.29419-11-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bef76abcff8d..bc0069a8b6ea 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -13,6 +13,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), #define RTNL_FLAG_DOIT_PERNET RTNL_FLAG_DOIT_UNLOCKED +#define RTNL_FLAG_DOIT_PERNET_WIP RTNL_FLAG_DOIT_UNLOCKED RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), RTNL_FLAG_DUMP_UNLOCKED = BIT(2), RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */ -- cgit v1.2.3 From 5dc51ec86df6e2214d8398079c1e31736593ab53 Mon Sep 17 00:00:00 2001 From: Martin Karsten Date: Sat, 9 Nov 2024 05:02:31 +0000 Subject: net: Add napi_struct parameter irq_suspend_timeout Add a per-NAPI IRQ suspension parameter, which can be get/set with netdev-genl. This patch doesn't change any behavior but prepares the code for other changes in the following commits which use irq_suspend_timeout as a timeout for IRQ suspension. Signed-off-by: Martin Karsten Co-developed-by: Joe Damato Signed-off-by: Joe Damato Tested-by: Joe Damato Tested-by: Martin Karsten Acked-by: Stanislav Fomichev Reviewed-by: Sridhar Samudrala Link: https://patch.msgid.link/20241109050245.191288-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ include/uapi/linux/netdev.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index df4483598628..0aae346d919e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -348,6 +348,7 @@ struct gro_list { */ struct napi_config { u64 gro_flush_timeout; + u64 irq_suspend_timeout; u32 defer_hard_irqs; unsigned int napi_id; }; @@ -384,6 +385,7 @@ struct napi_struct { struct hrtimer timer; struct task_struct *thread; unsigned long gro_flush_timeout; + unsigned long irq_suspend_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e3ebb49f60d2..e4be227d3ad6 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -124,6 +124,7 @@ enum { NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 3fcbecbdeb048dfd1bea824f4276717fed02d10e Mon Sep 17 00:00:00 2001 From: Martin Karsten Date: Sat, 9 Nov 2024 05:02:32 +0000 Subject: net: Add control functions for irq suspension The napi_suspend_irqs routine bootstraps irq suspension by elongating the defer timeout to irq_suspend_timeout. The napi_resume_irqs routine effectively cancels irq suspension by forcing the napi to be scheduled immediately. Signed-off-by: Martin Karsten Co-developed-by: Joe Damato Signed-off-by: Joe Damato Tested-by: Joe Damato Tested-by: Martin Karsten Acked-by: Stanislav Fomichev Reviewed-by: Sridhar Samudrala Link: https://patch.msgid.link/20241109050245.191288-3-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/net/busy_poll.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index f03040baaefd..c858270141bc 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -52,6 +52,9 @@ void napi_busy_loop_rcu(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), void *loop_end_arg, bool prefer_busy_poll, u16 budget); +void napi_suspend_irqs(unsigned int napi_id); +void napi_resume_irqs(unsigned int napi_id); + #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { -- cgit v1.2.3 From 213a695297e1f0c2ed814488757d496b0d7f7267 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 11 Nov 2024 20:49:11 +0800 Subject: bpf: Replace the document for PTR_TO_BTF_ID_OR_NULL Commit c25b2ae13603 ("bpf: Replace PTR_TO_XXX_OR_NULL with PTR_TO_XXX | PTR_MAYBE_NULL") moved the fields around and misplaced the documentation for "PTR_TO_BTF_ID_OR_NULL". So, let's replace it in the proper place. Signed-off-by: Menglong Dong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241111124911.1436911-1-dongml2@chinatelecom.cn --- include/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b84613b10ac..7da41ae2eac8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -932,10 +932,6 @@ enum bpf_reg_type { * additional context, assume the value is non-null. */ PTR_TO_BTF_ID, - /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not - * been checked for null. Used primarily to inform the verifier - * an explicit null check is required for this struct. - */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ @@ -948,6 +944,10 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is -- cgit v1.2.3 From db0fc586edde83ff7ff65fea56c4f72dae511764 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 28 Oct 2024 16:31:32 -0700 Subject: drm/i915/gsc: ARL-H and ARL-U need a newer GSC FW. All MTL and ARL SKUs share the same GSC FW, but the newer platforms are only supported in newer blobs. In particular, ARL-S is supported starting from 102.0.10.1878 (which is already the minimum required version for ARL in the code), while ARL-H and ARL-U are supported from 102.1.15.1926. Therefore, the driver needs to check which specific ARL subplatform its running on when verifying that the GSC FW is new enough for it. Fixes: 2955ae8186c8 ("drm/i915: ARL requires a newer GSC firmware") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20241028233132.149745-1-daniele.ceraolospurio@intel.com (cherry picked from commit 3c1d5ced18db8a67251c8436cf9bdc061f972bdb) Signed-off-by: Joonas Lahtinen --- include/drm/intel/i915_pciids.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index 2bf03ebfcf73..f35534522d33 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -771,13 +771,24 @@ INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) -/* MTL */ -#define INTEL_ARL_IDS(MACRO__, ...) \ - MACRO__(0x7D41, ## __VA_ARGS__), \ +/* ARL */ +#define INTEL_ARL_H_IDS(MACRO__, ...) \ MACRO__(0x7D51, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ MACRO__(0x7DD1, ## __VA_ARGS__) +#define INTEL_ARL_U_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__) \ + +#define INTEL_ARL_S_IDS(MACRO__, ...) \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0xB640, ## __VA_ARGS__) + +#define INTEL_ARL_IDS(MACRO__, ...) \ + INTEL_ARL_H_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_ARL_U_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_ARL_S_IDS(MACRO__, ## __VA_ARGS__) + +/* MTL */ #define INTEL_MTL_IDS(MACRO__, ...) \ INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0x7D40, ## __VA_ARGS__), \ -- cgit v1.2.3 From 37653a0b8a6f5d6ab23daa8e585c5ed24a0fc500 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:55:53 +0800 Subject: net: ip: make fib_validate_source() support drop reasons In this commit, we make fib_validate_source() and __fib_validate_source() return -reason instead of errno on error. The return value of fib_validate_source can be -errno, 0, and 1. It's hard to make fib_validate_source() return drop reasons directly. The fib_validate_source() will return 1 if the scope of the source(revert) route is HOST. And the __mkroute_input() will mark the skb with IPSKB_DOREDIRECT in this case (combine with some other conditions). And then, a REDIRECT ICMP will be sent in ip_forward() if this flag exists. We can't pass this information to __mkroute_input if we make fib_validate_source() return drop reasons. Therefore, we introduce the wrapper fib_validate_source_reason() for fib_validate_source(), which will return the drop reasons on error. In the origin logic, LINUX_MIB_IPRPFILTER will be counted if fib_validate_source() return -EXDEV. And now, we need to adjust it by checking "reason == SKB_DROP_REASON_IP_RPFILTER". However, this will take effect only after the patch "net: ip: make ip_route_input_noref() return drop reasons", as we can't pass the drop reasons from fib_validate_source() to ip_rcv_finish_core() in this patch. Following new drop reasons are added in this patch: SKB_DROP_REASON_IP_LOCAL_SOURCE SKB_DROP_REASON_IP_INVALID_SOURCE Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason-core.h | 10 ++++++++++ include/net/ip_fib.h | 12 ++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index d59bb96c5a02..62a60be1db84 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -76,6 +76,8 @@ FN(INVALID_PROTO) \ FN(IP_INADDRERRORS) \ FN(IP_INNOROUTES) \ + FN(IP_LOCAL_SOURCE) \ + FN(IP_INVALID_SOURCE) \ FN(PKT_TOO_BIG) \ FN(DUP_FRAG) \ FN(FRAG_REASM_TIMEOUT) \ @@ -373,6 +375,14 @@ enum skb_drop_reason { * IPSTATS_MIB_INADDRERRORS */ SKB_DROP_REASON_IP_INNOROUTES, + /** @SKB_DROP_REASON_IP_LOCAL_SOURCE: the source ip is local */ + SKB_DROP_REASON_IP_LOCAL_SOURCE, + /** + * @SKB_DROP_REASON_IP_INVALID_SOURCE: the source ip is invalid: + * 1) source ip is multicast or limited broadcast + * 2) source ip is zero and not IGMP + */ + SKB_DROP_REASON_IP_INVALID_SOURCE, /** * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the * MTU) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b6e44f4eaa4c..a113c11ab56b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -452,6 +452,18 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); +static inline enum skb_drop_reason +fib_validate_source_reason(struct sk_buff *skb, __be32 src, __be32 dst, + dscp_t dscp, int oif, struct net_device *dev, + struct in_device *idev, u32 *itag) +{ + int err = fib_validate_source(skb, src, dst, dscp, oif, dev, idev, + itag); + if (err < 0) + return -err; + return SKB_NOT_DROPPED_YET; +} + #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { -- cgit v1.2.3 From d46f827016d891dbc234cb05c406180f77fb3b2d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:55:55 +0800 Subject: net: ip: make ip_mc_validate_source() return drop reason Make ip_mc_validate_source() return drop reason, and adjust the call of it in ip_route_input_mc(). Another caller of it is ip_rcv_finish_core->udp_v4_early_demux, and the errno is not checked in detail, so we don't do more adjustment for it. The drop reason "SKB_DROP_REASON_IP_LOCALNET" is added in this commit. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason-core.h | 3 +++ include/net/route.h | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 62a60be1db84..a2a1fb90e0e5 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -78,6 +78,7 @@ FN(IP_INNOROUTES) \ FN(IP_LOCAL_SOURCE) \ FN(IP_INVALID_SOURCE) \ + FN(IP_LOCALNET) \ FN(PKT_TOO_BIG) \ FN(DUP_FRAG) \ FN(FRAG_REASM_TIMEOUT) \ @@ -383,6 +384,8 @@ enum skb_drop_reason { * 2) source ip is zero and not IGMP */ SKB_DROP_REASON_IP_INVALID_SOURCE, + /** @SKB_DROP_REASON_IP_LOCALNET: source or dest ip is local net */ + SKB_DROP_REASON_IP_LOCALNET, /** * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the * MTU) diff --git a/include/net/route.h b/include/net/route.h index 0a690adfdff5..e2e1922c58fb 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -199,9 +199,10 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - dscp_t dscp, struct net_device *dev, - struct in_device *in_dev, u32 *itag); +enum skb_drop_reason +ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, -- cgit v1.2.3 From 5b92112acd8e2ed84a4df653fc20575f4da6fa49 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:55:56 +0800 Subject: net: ip: make ip_route_input_slow() return drop reasons In this commit, we make ip_route_input_slow() return skb drop reasons, and following new skb drop reasons are added: SKB_DROP_REASON_IP_INVALID_DEST The only caller of ip_route_input_slow() is ip_route_input_rcu(), and we adjust it by making it return -EINVAL on error. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason-core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a2a1fb90e0e5..74624d369d48 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -79,6 +79,7 @@ FN(IP_LOCAL_SOURCE) \ FN(IP_INVALID_SOURCE) \ FN(IP_LOCALNET) \ + FN(IP_INVALID_DEST) \ FN(PKT_TOO_BIG) \ FN(DUP_FRAG) \ FN(FRAG_REASM_TIMEOUT) \ @@ -386,6 +387,11 @@ enum skb_drop_reason { SKB_DROP_REASON_IP_INVALID_SOURCE, /** @SKB_DROP_REASON_IP_LOCALNET: source or dest ip is local net */ SKB_DROP_REASON_IP_LOCALNET, + /** + * @SKB_DROP_REASON_IP_INVALID_DEST: the dest ip is invalid: + * 1) dest ip is 0 + */ + SKB_DROP_REASON_IP_INVALID_DEST, /** * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the * MTU) -- cgit v1.2.3 From 82d9983ebeb871cb5abd27c12a950c14c68772e1 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:55:58 +0800 Subject: net: ip: make ip_route_input_noref() return drop reasons In this commit, we make ip_route_input_noref() return drop reasons, which come from ip_route_input_rcu(). We need adjust the callers of ip_route_input_noref() to make sure the return value of ip_route_input_noref() is used properly. The errno that ip_route_input_noref() returns comes from ip_route_input and bpf_lwt_input_reroute in the origin logic, and we make them return -EINVAL on error instead. In the following patch, we will make ip_route_input() returns drop reasons too. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/route.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index e2e1922c58fb..b85ffa3e042b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -203,8 +203,9 @@ enum skb_drop_reason ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); -int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - dscp_t dscp, struct net_device *dev); +enum skb_drop_reason +ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev); int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); @@ -212,18 +213,18 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp, struct net_device *devin) { - int err; + enum skb_drop_reason reason; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, dscp, devin); - if (!err) { + reason = ip_route_input_noref(skb, dst, src, dscp, devin); + if (!reason) { skb_dst_force(skb); if (!skb_dst(skb)) - err = -EINVAL; + reason = SKB_DROP_REASON_NOT_SPECIFIED; } rcu_read_unlock(); - return err; + return reason ? -EINVAL : 0; } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, -- cgit v1.2.3 From 50038bf38e6577a15d52b890d82c197cf3b163a0 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:55:59 +0800 Subject: net: ip: make ip_route_input() return drop reasons In this commit, we make ip_route_input() return skb drop reasons that come from ip_route_input_noref(). Meanwhile, adjust all the call to it. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/route.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index b85ffa3e042b..fb3433dc9c72 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -210,8 +210,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); -static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - dscp_t dscp, struct net_device *devin) +static inline enum skb_drop_reason +ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp, + struct net_device *devin) { enum skb_drop_reason reason; @@ -224,7 +225,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, } rcu_read_unlock(); - return reason ? -EINVAL : 0; + return reason; } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, -- cgit v1.2.3 From d9340d1e02779dbf83d53b0deb4068c7768b8261 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:56:00 +0800 Subject: net: ip: make ip_mkroute_input/__mkroute_input return drop reasons In this commit, we make ip_mkroute_input() and __mkroute_input() return drop reasons. The drop reason "SKB_DROP_REASON_ARP_PVLAN_DISABLE" is introduced for the case: the packet which is not IP is forwarded to the in_dev, and the proxy_arp_pvlan is not enabled. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason-core.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 74624d369d48..6c5a1ea209a2 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -104,6 +104,7 @@ FN(IP_TUNNEL_ECN) \ FN(TUNNEL_TXINFO) \ FN(LOCAL_MAC) \ + FN(ARP_PVLAN_DISABLE) \ FNe(MAX) /** @@ -477,6 +478,12 @@ enum skb_drop_reason { * the MAC address of the local netdev. */ SKB_DROP_REASON_LOCAL_MAC, + /** + * @SKB_DROP_REASON_ARP_PVLAN_DISABLE: packet which is not IP is + * forwarded to the in_dev, and the proxy_arp_pvlan is not + * enabled. + */ + SKB_DROP_REASON_ARP_PVLAN_DISABLE, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From 479aed04e84a5d66caa3b25bfc651292c153ef70 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Nov 2024 20:56:01 +0800 Subject: net: ip: make ip_route_use_hint() return drop reasons In this commit, we make ip_route_use_hint() return drop reasons. The drop reasons that we return are similar to what we do in ip_route_input_slow(), and no drop reasons are added in this commit. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/route.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index fb3433dc9c72..84cb1e04f5cd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -206,9 +206,10 @@ ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); -int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, - dscp_t dscp, struct net_device *dev, - const struct sk_buff *hint); +enum skb_drop_reason +ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + const struct sk_buff *hint); static inline enum skb_drop_reason ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp, -- cgit v1.2.3 From 12079a59ce52e72a342c49cfacf0281213fd6f32 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 7 Nov 2024 08:11:44 -0800 Subject: net: Implement fault injection forcing skb reallocation Introduce a fault injection mechanism to force skb reallocation. The primary goal is to catch bugs related to pointer invalidation after potential skb reallocation. The fault injection mechanism aims to identify scenarios where callers retain pointers to various headers in the skb but fail to reload these pointers after calling a function that may reallocate the data. This type of bug can lead to memory corruption or crashes if the old, now-invalid pointers are used. By forcing reallocation through fault injection, we can stress-test code paths and ensure proper pointer management after potential skb reallocations. Add a hook for fault injection in the following functions: * pskb_trim_rcsum() * pskb_may_pull_reason() * pskb_trim() As the other fault injection mechanism, protect it under a debug Kconfig called CONFIG_FAIL_SKB_REALLOC. This patch was *heavily* inspired by Jakub's proposal from: https://lore.kernel.org/all/20240719174140.47a868e6@kernel.org/ CC: Akinobu Mita Suggested-by: Jakub Kicinski Signed-off-by: Breno Leitao Reviewed-by: Akinobu Mita Acked-by: Paolo Abeni Acked-by: Guillaume Nault Link: https://patch.msgid.link/20241107-fault_v6-v6-1-1b82cb6ecacd@debian.org Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60535c706851..58009fa66102 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2682,6 +2682,12 @@ static inline void skb_assert_len(struct sk_buff *skb) #endif /* CONFIG_DEBUG_NET */ } +#if defined(CONFIG_FAIL_SKB_REALLOC) +void skb_might_realloc(struct sk_buff *skb); +#else +static inline void skb_might_realloc(struct sk_buff *skb) {} +#endif + /* * Add data to an sk_buff */ @@ -2782,6 +2788,7 @@ static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_might_realloc(skb); if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -3240,6 +3247,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) static inline int pskb_trim(struct sk_buff *skb, unsigned int len) { + skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; } @@ -3994,6 +4002,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { + skb_might_realloc(skb); if (likely(len >= skb->len)) return 0; return pskb_trim_rcsum_slow(skb, len); -- cgit v1.2.3 From 406c5548c661df0bff6bb6ee79bf9d49faf23e31 Mon Sep 17 00:00:00 2001 From: MeiChia Chiu Date: Tue, 12 Nov 2024 16:38:46 +0800 Subject: wifi: mac80211: Support EHT 1024 aggregation size in TX Support EHT 1024 aggregation size in TX The 1024 agg size for RX is supported but not for TX. This patch adds this support and refactors common parsing logics for addbaext in both process_addba_resp and process_addba_req into a function. Reviewed-by: Shayne Chen Reviewed-by: Money Wang Co-developed-by: Peter Chiu Signed-off-by: Peter Chiu Signed-off-by: MeiChia Chiu Link: https://patch.msgid.link/20241112083846.32063-1-MeiChia.Chiu@mediatek.com [pass elems/len instead of mgmt/len/is_req] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 456bca45ff05..05dedc45505c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1445,6 +1445,8 @@ struct ieee80211_mgmt { __le16 status; __le16 capab; __le16 timeout; + /* followed by BA Extension */ + u8 variable[]; } __packed addba_resp; struct{ u8 action_code; -- cgit v1.2.3 From ed9d95f691c29748f21bc019de9566b698fdfab7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Nov 2024 10:09:56 -0500 Subject: fs: add the ability for statmount() to report the fs_subtype /proc/self/mountinfo prints out the sb->s_subtype after the type. This is particularly useful for disambiguating FUSE mounts (at least when the userland driver bothers to set it). Add STATMOUNT_FS_SUBTYPE and claim one of the __spare2 fields to point to the offset into the str[] array. Reviewed-by: Jan Kara Reviewed-by: Ian Kent Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241111-statmount-v4-2-2eaf35d07a80@kernel.org Acked-by: Miklos Szeredi Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 225bc366ffcb..2e939dddf9cb 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -173,7 +173,9 @@ struct statmount { __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ - __u64 __spare2[49]; + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 __spare1[1]; + __u64 __spare2[48]; char str[]; /* Variable size part containing strings */ }; @@ -207,6 +209,7 @@ struct mnt_id_req { #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 8182a8b39aa227f5b99b8d4d18f296b82ce4b94c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2024 06:43:55 +0100 Subject: writeback: wbc_attach_fdatawrite_inode out of line This allows exporting this high-level interface only while keeping wbc_attach_and_unlock_inode private in fs-writeback.c and unexporting __inode_attach_wb. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241112054403.1470586-3-hch@lst.de Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/writeback.h | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d6db822e4bb3..aee3e1b4c50f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -213,9 +213,6 @@ static inline void wait_on_inode(struct inode *inode) #include void __inode_attach_wb(struct inode *inode, struct folio *folio); -void wbc_attach_and_unlock_inode(struct writeback_control *wbc, - struct inode *inode) - __releases(&inode->i_lock); void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes); @@ -254,22 +251,8 @@ static inline void inode_detach_wb(struct inode *inode) } } -/** - * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite - * @wbc: writeback_control of interest - * @inode: target inode - * - * This function is to be used by __filemap_fdatawrite_range(), which is an - * alternative entry point into writeback code, and first ensures @inode is - * associated with a bdi_writeback and attaches it to @wbc. - */ -static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, - struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode_attach_wb(inode, NULL); - wbc_attach_and_unlock_inode(wbc, inode); -} +void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, + struct inode *inode); /** * wbc_init_bio - writeback specific initializtion of bio @@ -303,13 +286,6 @@ static inline void inode_detach_wb(struct inode *inode) { } -static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, - struct inode *inode) - __releases(&inode->i_lock) -{ - spin_unlock(&inode->i_lock); -} - static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { -- cgit v1.2.3 From 365f34483be33a9d0151c06ac39627d7927210d9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Oct 2024 09:11:00 -0700 Subject: srcu: Renaming in preparation for additional reader flavor Currently, there are only two flavors of readers, normal and NMI-safe. A number of fields, functions, and types reflect this restriction. This renaming-only commit prepares for the addition of light-weight (as in memory-barrier-free) readers. OK, OK, there is also a drive-by white-space fixeup! Signed-off-by: Paul E. McKenney Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Peter Zijlstra Cc: Kent Overstreet Cc: Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 21 ++++++++++----------- include/linux/srcutree.h | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 835bbb2d1f88..06728ef6f32a 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -181,10 +181,9 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #define SRCU_NMI_SAFE 0x2 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) -void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe); +void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); #else -static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, - bool nmi_safe) { } +static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { } #endif @@ -245,7 +244,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, false); retval = __srcu_read_lock(ssp); srcu_lock_acquire(&ssp->dep_map); return retval; @@ -262,7 +261,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp { int retval; - srcu_check_nmi_safety(ssp, true); + srcu_check_read_flavor(ssp, true); retval = __srcu_read_lock_nmisafe(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; @@ -274,7 +273,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, false); retval = __srcu_read_lock(ssp); return retval; } @@ -303,7 +302,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(in_nmi()); - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, false); return __srcu_read_lock(ssp); } @@ -318,7 +317,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, false); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock(ssp, idx); } @@ -334,7 +333,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); - srcu_check_nmi_safety(ssp, true); + srcu_check_read_flavor(ssp, true); rcu_lock_release(&ssp->dep_map); __srcu_read_unlock_nmisafe(ssp, idx); } @@ -343,7 +342,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, false); __srcu_read_unlock(ssp, idx); } @@ -360,7 +359,7 @@ static inline void srcu_up_read(struct srcu_struct *ssp, int idx) { WARN_ON_ONCE(idx & ~0x1); WARN_ON_ONCE(in_nmi()); - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, false); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index ed57598394de..ab7d8d215b84 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -25,7 +25,7 @@ struct srcu_data { /* Read-side state. */ atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ - int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */ + int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; -- cgit v1.2.3 From c071b8e5351453c2cb2d12f425d928f3a24ed2d3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Oct 2024 09:11:03 -0700 Subject: srcu: Improve srcu_read_lock{,_nmisafe}() comments This commit adds some additional usage constraints to the kernel-doc headers of srcu_read_lock() and srcu_read_lock_nmi_safe(). Suggested-by: Andrii Nakryiko Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 06728ef6f32a..46fd06b212ba 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -235,10 +235,13 @@ static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flav * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * Note that srcu_read_lock() and the matching srcu_read_unlock() must - * occur in the same context, for example, it is illegal to invoke - * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() - * was invoked in process context. + * The return value from srcu_read_lock() must be passed unaltered + * to the matching srcu_read_unlock(). Note that srcu_read_lock() and + * the matching srcu_read_unlock() must occur in the same context, for + * example, it is illegal to invoke srcu_read_unlock() in an irq handler + * if the matching srcu_read_lock() was invoked in process context. Or, + * for that matter to invoke srcu_read_unlock() from one task and the + * matching srcu_read_lock() from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { @@ -256,6 +259,10 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) * * Enter an SRCU read-side critical section, but in an NMI-safe manner. * See srcu_read_lock() for more information. + * + * If srcu_read_lock_nmisafe() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. */ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) { -- cgit v1.2.3 From 05829be27fe6f64e0675dc3be3a12d43b52492e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Oct 2024 09:11:04 -0700 Subject: srcu: Create CPP macros for normal and NMI-safe SRCU readers This commit creates SRCU_READ_FLAVOR_NORMAL and SRCU_READ_FLAVOR_NMI C-preprocessor macros for srcu_read_lock() and srcu_read_lock_nmisafe(), respectively. These replace the old true/false values that were previously passed to srcu_check_read_flavor(). In addition, the srcu_check_read_flavor() function itself requires a bit of rework to handle bitmasks instead of true/false values. Signed-off-by: Paul E. McKenney Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Peter Zijlstra Cc: Kent Overstreet Cc: Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 20 ++++++++------------ include/linux/srcutree.h | 4 ++++ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 46fd06b212ba..84daaa33ea0a 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -176,10 +176,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -#define SRCU_NMI_UNKNOWN 0x0 -#define SRCU_NMI_UNSAFE 0x1 -#define SRCU_NMI_SAFE 0x2 - #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); #else @@ -247,7 +243,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_read_flavor(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); retval = __srcu_read_lock(ssp); srcu_lock_acquire(&ssp->dep_map); return retval; @@ -268,7 +264,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp { int retval; - srcu_check_read_flavor(ssp, true); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI); retval = __srcu_read_lock_nmisafe(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; @@ -280,7 +276,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_read_flavor(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); retval = __srcu_read_lock(ssp); return retval; } @@ -309,7 +305,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(in_nmi()); - srcu_check_read_flavor(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); return __srcu_read_lock(ssp); } @@ -324,7 +320,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); - srcu_check_read_flavor(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock(ssp, idx); } @@ -340,7 +336,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); - srcu_check_read_flavor(ssp, true); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI); rcu_lock_release(&ssp->dep_map); __srcu_read_unlock_nmisafe(ssp, idx); } @@ -349,7 +345,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { - srcu_check_read_flavor(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); __srcu_read_unlock(ssp, idx); } @@ -366,7 +362,7 @@ static inline void srcu_up_read(struct srcu_struct *ssp, int idx) { WARN_ON_ONCE(idx & ~0x1); WARN_ON_ONCE(in_nmi()); - srcu_check_read_flavor(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index ab7d8d215b84..79ad809c7f03 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -43,6 +43,10 @@ struct srcu_data { struct srcu_struct *ssp; }; +/* Values for ->srcu_reader_flavor. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). + /* * Node in SRCU combining tree, similar in function to rcu_data. */ -- cgit v1.2.3 From 6364dd8191d27230176ac4b1b4daaecaf4807399 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Oct 2024 09:11:05 -0700 Subject: srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite() This patch adds srcu_read_lock_lite() and srcu_read_unlock_lite(), which dispense with the read-side smp_mb() but also are restricted to code regions that RCU is watching. If a given srcu_struct structure uses srcu_read_lock_lite() and srcu_read_unlock_lite(), it is not permitted to use any other SRCU read-side marker, before, during, or after. Another price of light-weight readers is heavier weight grace periods. Such readers mean that SRCU grace periods on srcu_struct structures used by light-weight readers will incur at least two calls to synchronize_rcu(). In addition, normal SRCU grace periods for light-weight-reader srcu_struct structures never auto-expedite. Note that expedited SRCU grace periods for light-weight-reader srcu_struct structures still invoke synchronize_rcu(), not synchronize_srcu_expedited(). Something about wishing to keep the IPIs down to a dull roar. The srcu_read_lock_lite() and srcu_read_unlock_lite() functions may not (repeat, *not*) be used from NMI handlers, but if this is needed, an additional flavor of SRCU reader can be added by some future commit. [ paulmck: Apply Alexei Starovoitov expediting feedback. ] [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Paul E. McKenney Tested-by: kernel test robot Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Peter Zijlstra Cc: Kent Overstreet Cc: Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/srcutree.h | 1 + 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 84daaa33ea0a..4ba96e2cfa40 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +#ifdef CONFIG_TINY_SRCU +#define __srcu_read_lock_lite __srcu_read_lock +#define __srcu_read_unlock_lite __srcu_read_unlock +#else // #ifdef CONFIG_TINY_SRCU +int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp); +#endif // #else // #ifdef CONFIG_TINY_SRCU void synchronize_srcu(struct srcu_struct *ssp); #define SRCU_GET_STATE_COMPLETED 0x1 @@ -179,7 +186,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); #else -static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { } +#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) #endif @@ -249,6 +256,32 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) return retval; } +/** + * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but for a light-weight + * smp_mb()-free reader. See srcu_read_lock() for more information. + * + * If srcu_read_lock_lite() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. Note that grace-period auto-expediting is disabled for _lite + * srcu_struct structures because auto-expedited grace periods invoke + * synchronize_rcu_expedited(), IPIs and all. + * + * Note that srcu_read_lock_lite() can be invoked only from those contexts + * where RCU is watching. Otherwise, lockdep will complain. + */ +static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + retval = __srcu_read_lock_lite(ssp); + rcu_try_lock_acquire(&ssp->dep_map); + return retval; +} + /** * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -325,6 +358,22 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit a light-weight SRCU read-side critical section. + */ +static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + srcu_lock_release(&ssp->dep_map); + __srcu_read_unlock(ssp, idx); +} + /** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 79ad809c7f03..8074138cbd62 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -46,6 +46,7 @@ struct srcu_data { /* Values for ->srcu_reader_flavor. */ #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). /* * Node in SRCU combining tree, similar in function to rcu_data. -- cgit v1.2.3 From bb94b12e4503bdce003a74e95ee4214eba923f86 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Oct 2024 09:11:06 -0700 Subject: srcu: Allow inlining of __srcu_read_{,un}lock_lite() This commit moves __srcu_read_lock_lite() and __srcu_read_unlock_lite() into include/linux/srcu.h and marks them "static inline" so that they can be inlined into srcu_read_lock_lite() and srcu_read_unlock_lite(), respectively. They are not hand-inlined due to Tree SRCU and Tiny SRCU having different implementations. The earlier removal of smp_mb() combined with the inlining produce significant single-percentage performance wins. Link: https://lore.kernel.org/all/CAEf4BzYgiNmSb=ZKQ65tm6nJDi1UX2Gq26cdHSH1mPwXJYZj5g@mail.gmail.com/ Reported-by: Alexei Starovoitov Signed-off-by: Paul E. McKenney Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Peter Zijlstra Cc: Kent Overstreet Cc: Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcutree.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 8074138cbd62..778eb61542e1 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -209,4 +209,43 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Returns an index that must be passed to the matching + * srcu_read_unlock_lite(). + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) +{ + int idx; + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */ + barrier(); /* Avoid leaking the critical section. */ + return idx; +} + +/* + * Removes the count for the old reader from the appropriate + * per-CPU element of the srcu_struct. Note that this may well be a + * different CPU than that which was incremented by the corresponding + * srcu_read_lock_lite(), but it must be within the same task. + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) +{ + barrier(); /* Avoid leaking the critical section. */ + this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); +} + #endif -- cgit v1.2.3 From 768b1f87098a4a586353898b074989808b1b27ad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Oct 2024 09:11:12 -0700 Subject: srcu: Improve srcu_read_lock_lite() kernel-doc comment Where RCU is watching is where it is OK to invoke rcu_read_lock(). Reported-by: Andrii Nakryiko Signed-off-by: Paul E. McKenney Acked-by: Andrii Nakryiko Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4ba96e2cfa40..bab1dae3f69e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -270,7 +270,8 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) * synchronize_rcu_expedited(), IPIs and all. * * Note that srcu_read_lock_lite() can be invoked only from those contexts - * where RCU is watching. Otherwise, lockdep will complain. + * where RCU is watching, that is, from contexts where it would be legal + * to invoke rcu_read_lock(). Otherwise, lockdep will complain. */ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) { -- cgit v1.2.3 From 7d4f46c2372d5a850e28f63001013de50843b6e6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:18 -0800 Subject: iommufd: Move _iommufd_object_alloc helper to a sharable file The following patch will add a new vIOMMU allocator that will require this _iommufd_object_alloc to be sharable with IOMMU drivers (and iommufd too). Add a new driver.c file that will be built with CONFIG_IOMMUFD_DRIVER_CORE selected by CONFIG_IOMMUFD, and put the CONFIG_DRIVER under that remaining to be selectable for drivers to build the existing iova_bitmap.c file. Link: https://patch.msgid.link/r/2f4f6e116dc49ffb67ff6c5e8a7a8e789ab9e98e.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 22948dd03d67..94522d4029ca 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -135,4 +135,17 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) return -EOPNOTSUPP; } #endif /* CONFIG_IOMMUFD */ + +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); +#else /* !CONFIG_IOMMUFD_DRIVER_CORE */ +static inline struct iommufd_object * +_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, + enum iommufd_object_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif /* CONFIG_IOMMUFD_DRIVER_CORE */ #endif -- cgit v1.2.3 From 6b22d562fcd6e3d1cc1c265b0596840946d16a09 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:19 -0800 Subject: iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent a slice of physical IOMMU device passed to or shared with a user space VM. This slice, now a vIOMMU object, is a group of virtualization resources of a physical IOMMU's, such as: - Security namespace for guest owned ID, e.g. guest-controlled cache tags - Non-device-affiliated event reporting, e.g. invalidation queue errors - Access to a sharable nesting parent pagetable across physical IOMMUs - Virtualization of various platforms IDs, e.g. RIDs and others - Delivery of paravirtualized invalidation - Direct assigned invalidation queues - Direct assigned interrupts Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own vIOMMU structures. And this allocation also needs a free(), so add struct iommufd_viommu_ops. To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper. It's suggested that a driver should embed a core-level viommu structure in its driver-level viommu struct and call the iommufd_viommu_alloc() helper, meanwhile the driver can also implement a viommu ops: struct my_driver_viommu { struct iommufd_viommu core; /* driver-owned properties/features */ .... }; static const struct iommufd_viommu_ops my_driver_viommu_ops = { .free = my_driver_viommu_free, /* future ops for virtualization features */ .... }; static struct iommufd_viommu my_driver_viommu_alloc(...) { struct my_driver_viommu *my_viommu = iommufd_viommu_alloc(ictx, my_driver_viommu, core, my_driver_viommu_ops); /* Init my_viommu and related HW feature */ .... return &my_viommu->core; } static struct iommu_domain_ops my_driver_domain_ops = { .... .viommu_alloc = my_driver_viommu_alloc, }; Link: https://patch.msgid.link/r/64685e2b79dea0f1dc56f6ede04809b72d578935.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 14 ++++++++++++++ include/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..2574fc8abaf2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -42,6 +42,8 @@ struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; struct iommu_fault_param; +struct iommufd_ctx; +struct iommufd_viommu; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -542,6 +544,14 @@ static inline int __iommu_copy_struct_from_user_array( * @remove_dev_pasid: Remove any translation configurations of a specific * pasid, so that any DMA transactions with this pasid * will be blocked by the hardware. + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind + * the @dev, as the set of virtualization resources shared/passed + * to user space IOMMU instance. And associate it with a nesting + * @parent_domain. The @viommu_type must be defined in the header + * include/uapi/linux/iommufd.h + * It is required to call iommufd_viommu_alloc() helper for + * a bundled allocation of the core and the driver structures, + * using the given @ictx pointer. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity @@ -591,6 +601,10 @@ struct iommu_ops { void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, struct iommu_domain *domain); + struct iommufd_viommu *(*viommu_alloc)( + struct device *dev, struct iommu_domain *parent_domain, + struct iommufd_ctx *ictx, unsigned int viommu_type); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 94522d4029ca..4fc2ce332f10 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -17,6 +17,7 @@ struct iommu_group; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; +struct iommufd_viommu_ops; struct page; enum iommufd_object_type { @@ -28,6 +29,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_FAULT, + IOMMUFD_OBJ_VIOMMU, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -78,6 +80,26 @@ void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); +struct iommufd_viommu { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommu_device *iommu_dev; + struct iommufd_hwpt_paging *hwpt; + + const struct iommufd_viommu_ops *ops; + + unsigned int type; +}; + +/** + * struct iommufd_viommu_ops - vIOMMU specific operations + * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory + * of the vIOMMU will be free-ed by iommufd core after calling this op + */ +struct iommufd_viommu_ops { + void (*destroy)(struct iommufd_viommu *viommu); +}; + #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); struct iommufd_ctx *iommufd_ctx_from_fd(int fd); @@ -148,4 +170,22 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ + +/* + * Helpers for IOMMU driver to allocate driver structures that will be freed by + * the iommufd core. The free op will be called prior to freeing the memory. + */ +#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ + ({ \ + drv_struct *ret; \ + \ + static_assert(__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member.obj) == 0); \ + ret = (drv_struct *)_iommufd_object_alloc( \ + ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ + if (!IS_ERR(ret)) \ + ret->member.ops = viommu_ops; \ + ret; \ + }) #endif -- cgit v1.2.3 From 4db97c21ed07a7d4081ed9820599fa36857083d6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:21 -0800 Subject: iommufd/viommu: Add IOMMU_VIOMMU_ALLOC ioctl Add a new ioctl for user space to do a vIOMMU allocation. It must be based on a nesting parent HWPT, so take its refcount. IOMMU driver wanting to support vIOMMUs must define its IOMMU_VIOMMU_TYPE_ in the uAPI header and implement a viommu_alloc op in its iommu_ops. Link: https://patch.msgid.link/r/dc2b8ba9ac935007beff07c1761c31cd097ed780.1730836219.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 41b1a01e9293..302844136b02 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -52,6 +52,7 @@ enum { IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, + IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, }; /** @@ -822,4 +823,43 @@ struct iommu_fault_alloc { __u32 out_fault_fd; }; #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) + +/** + * enum iommu_viommu_type - Virtual IOMMU Type + * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_viommu_type { + IOMMU_VIOMMU_TYPE_DEFAULT = 0, +}; + +/** + * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) + * @size: sizeof(struct iommu_viommu_alloc) + * @flags: Must be 0 + * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type + * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU + * @hwpt_id: ID of a nesting parent HWPT to associate to + * @out_viommu_id: Output virtual IOMMU ID for the allocated object + * + * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's + * virtualization support that is a security-isolated slice of the real IOMMU HW + * that is unique to a specific VM. Operations global to the IOMMU are connected + * to the vIOMMU, such as: + * - Security namespace for guest owned ID, e.g. guest-controlled cache tags + * - Non-device-affiliated event reporting, e.g. invalidation queue errors + * - Access to a sharable nesting parent pagetable across physical IOMMUs + * - Virtualization of various platforms IDs, e.g. RIDs and others + * - Delivery of paravirtualized invalidation + * - Direct assigned invalidation queues + * - Direct assigned interrupts + */ +struct iommu_viommu_alloc { + __u32 size; + __u32 flags; + __u32 type; + __u32 dev_id; + __u32 hwpt_id; + __u32 out_viommu_id; +}; +#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) #endif -- cgit v1.2.3 From 69d2689e57f5cb235c0609dd2f8fa10c1a832f87 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:22 -0800 Subject: iommufd: Add alloc_domain_nested op to iommufd_viommu_ops Allow IOMMU driver to use a vIOMMU object that holds a nesting parent hwpt/domain to allocate a nested domain. Link: https://patch.msgid.link/r/2dcdb5e405dc0deb68230564530d989d285d959c.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 4fc2ce332f10..de9b56265c9c 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -14,6 +14,7 @@ struct device; struct file; struct iommu_group; +struct iommu_user_data; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; @@ -95,9 +96,17 @@ struct iommufd_viommu { * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory * of the vIOMMU will be free-ed by iommufd core after calling this op + * @alloc_domain_nested: Allocate a IOMMU_DOMAIN_NESTED on a vIOMMU that holds a + * nesting parent domain (IOMMU_DOMAIN_PAGING). @user_data + * must be defined in include/uapi/linux/iommufd.h. + * It must fully initialize the new iommu_domain before + * returning. Upon failure, ERR_PTR must be returned. */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); + struct iommu_domain *(*alloc_domain_nested)( + struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data); }; #if IS_ENABLED(CONFIG_IOMMUFD) -- cgit v1.2.3 From 13a750180fc86d41695c8f64d8892412482a401d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:23 -0800 Subject: iommufd: Allow pt_id to carry viommu_id for IOMMU_HWPT_ALLOC Now a vIOMMU holds a shareable nesting parent HWPT. So, it can act like that nesting parent HWPT to allocate a nested HWPT. Support that in the IOMMU_HWPT_ALLOC ioctl handler, and update its kdoc. Also, add an iommufd_viommu_alloc_hwpt_nested helper to allocate a nested HWPT for a vIOMMU object. Since a vIOMMU object holds the parent hwpt's refcount already, increase the refcount of the vIOMMU only. Link: https://patch.msgid.link/r/a0f24f32bfada8b448d17587adcaedeeb50a67ed.1730836219.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 302844136b02..a498d4838f9a 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -435,7 +435,7 @@ enum iommu_hwpt_data_type { * @size: sizeof(struct iommu_hwpt_alloc) * @flags: Combination of enum iommufd_hwpt_alloc_flags * @dev_id: The device to allocate this HWPT for - * @pt_id: The IOAS or HWPT to connect this HWPT to + * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to * @out_hwpt_id: The ID of the new HWPT * @__reserved: Must be 0 * @data_type: One of enum iommu_hwpt_data_type @@ -454,11 +454,13 @@ enum iommu_hwpt_data_type { * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. * - * A user-managed nested HWPT will be created from a given parent HWPT via - * @pt_id, in which the parent HWPT must be allocated previously via the - * same ioctl from a given IOAS (@pt_id). In this case, the @data_type - * must be set to a pre-defined type corresponding to an I/O page table - * type supported by the underlying IOMMU hardware. + * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a + * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be + * allocated previously via the same ioctl from a given IOAS (@pt_id). In this + * case, the @data_type must be set to a pre-defined type corresponding to an + * I/O page table type supported by the underlying IOMMU hardware. The device + * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU + * instance. * * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr -- cgit v1.2.3 From 0ce5c2477af2e2284b9c70474e4dae85db211680 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:09 -0800 Subject: iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl Introduce a new IOMMUFD_OBJ_VDEVICE to represent a physical device (struct device) against a vIOMMU (struct iommufd_viommu) object in a VM. This vDEVICE object (and its structure) holds all the infos and attributes in the VM, regarding the device related to the vIOMMU. As an initial patch, add a per-vIOMMU virtual ID. This can be: - Virtual StreamID on a nested ARM SMMUv3, an index to a Stream Table - Virtual DeviceID on a nested AMD IOMMU, an index to a Device Table - Virtual RID on a nested Intel VT-D IOMMU, an index to a Context Table Potentially, this vDEVICE structure would hold some vData for Confidential Compute Architecture (CCA). Use this virtual ID to index an "vdevs" xarray that belongs to a vIOMMU object. Add a new ioctl for vDEVICE allocations. Since a vDEVICE is a connection of a device object and an iommufd_viommu object, take two refcounts in the ioctl handler. Link: https://patch.msgid.link/r/cda8fd2263166e61b8191a3b3207e0d2b08545bf.1730836308.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 4 ++++ include/uapi/linux/iommufd.h | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index de9b56265c9c..71fa1e343023 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device; struct file; @@ -31,6 +32,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, + IOMMUFD_OBJ_VDEVICE, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -89,6 +91,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; + struct xarray vdevs; + unsigned int type; }; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a498d4838f9a..9b5236004b8e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -53,6 +53,7 @@ enum { IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, + IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, }; /** @@ -864,4 +865,25 @@ struct iommu_viommu_alloc { __u32 out_viommu_id; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) + +/** + * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) + * @size: sizeof(struct iommu_vdevice_alloc) + * @viommu_id: vIOMMU ID to associate with the virtual device + * @dev_id: The physical device to allocate a virtual instance on the vIOMMU + * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY + * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID + * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * + * Allocate a virtual device instance (for a physical device) against a vIOMMU. + * This instance holds the device's information (related to its vIOMMU) in a VM. + */ +struct iommu_vdevice_alloc { + __u32 size; + __u32 viommu_id; + __u32 dev_id; + __u32 out_vdevice_id; + __aligned_u64 virt_id; +}; +#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) #endif -- cgit v1.2.3 From 67db79dc1a411335f8a7e53a5e206d96b237d9a8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:11 -0800 Subject: iommu/viommu: Add cache_invalidate to iommufd_viommu_ops This per-vIOMMU cache_invalidate op is like the cache_invalidate_user op in struct iommu_domain_ops, but wider, supporting device cache (e.g. PCI ATC invaldiations). Link: https://patch.msgid.link/r/90138505850fa6b165135e78a87b4cc7022869a4.1730836308.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 71fa1e343023..2bc735ff9511 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,6 +16,7 @@ struct device; struct file; struct iommu_group; struct iommu_user_data; +struct iommu_user_data_array; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; @@ -105,12 +106,21 @@ struct iommufd_viommu { * must be defined in include/uapi/linux/iommufd.h. * It must fully initialize the new iommu_domain before * returning. Upon failure, ERR_PTR must be returned. + * @cache_invalidate: Flush hardware cache used by a vIOMMU. It can be used for + * any IOMMU hardware specific cache: TLB and device cache. + * The @array passes in the cache invalidation requests, in + * form of a driver data structure. A driver must update the + * array->entry_num to report the number of handled requests. + * The data structure of the array entry must be defined in + * include/uapi/linux/iommufd.h */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); struct iommu_domain *(*alloc_domain_nested)( struct iommufd_viommu *viommu, u32 flags, const struct iommu_user_data *user_data); + int (*cache_invalidate)(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array); }; #if IS_ENABLED(CONFIG_IOMMUFD) -- cgit v1.2.3 From 54ce69e36c71c88f258b1a322c54343d90954858 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:12 -0800 Subject: iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE With a vIOMMU object, use space can flush any IOMMU related cache that can be directed via a vIOMMU object. It is similar to the IOMMU_HWPT_INVALIDATE uAPI, but can cover a wider range than IOTLB, e.g. device/desciprtor cache. Allow hwpt_id of the iommu_hwpt_invalidate structure to carry a viommu_id, and reuse the IOMMU_HWPT_INVALIDATE uAPI for vIOMMU invalidations. Drivers can define different structures for vIOMMU invalidations v.s. HWPT ones. Since both the HWPT-based and vIOMMU-based invalidation pathways check own cache invalidation op, remove the WARN_ON_ONCE in the allocator. Update the uAPI, kdoc, and selftest case accordingly. Link: https://patch.msgid.link/r/b411e2245e303b8a964f39f49453a5dff280968f.1730836308.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 9b5236004b8e..badb41c5bfa4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -700,7 +700,7 @@ struct iommu_hwpt_vtd_s1_invalidate { /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) - * @hwpt_id: ID of a nested HWPT for cache invalidation + * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation * @data_uptr: User pointer to an array of driver-specific cache invalidation * data. * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data @@ -711,8 +711,11 @@ struct iommu_hwpt_vtd_s1_invalidate { * Output the number of requests successfully handled by kernel. * @__reserved: Must be 0. * - * Invalidate the iommu cache for user-managed page table. Modifications on a - * user-managed page table should be followed by this operation to sync cache. + * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications + * on a user-managed page table should be followed by this operation, if a HWPT + * is passed in via @hwpt_id. Other caches, such as device cache or descriptor + * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. + * * Each ioctl can support one or more cache invalidation requests in the array * that has a total size of @entry_len * @entry_num. * -- cgit v1.2.3 From 4f2e59ccb69866c5165525bd7aee47cd5cd00acc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Nov 2024 12:05:13 -0800 Subject: iommu: Add iommu_copy_struct_from_full_user_array helper The iommu_copy_struct_from_user_array helper can be used to copy a single entry from a user array which might not be efficient if the array is big. Add a new iommu_copy_struct_from_full_user_array to copy the entire user array at once. Update the existing iommu_copy_struct_from_user_array kdoc accordingly. Link: https://patch.msgid.link/r/5cd773d9c26920c5807d232b21d415ea79172e49.1730836308.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2574fc8abaf2..11de66237eaa 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -493,7 +493,9 @@ static inline int __iommu_copy_struct_from_user_array( * @index: Index to the location in the array to copy user data from * @min_last: The last member of the data structure @kdst points in the * initial version. - * Return 0 for success, otherwise -error. + * + * Copy a single entry from a user array. Return 0 for success, otherwise + * -error. */ #define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ min_last) \ @@ -501,6 +503,50 @@ static inline int __iommu_copy_struct_from_user_array( kdst, user_array, data_type, index, sizeof(*(kdst)), \ offsetofend(typeof(*(kdst)), min_last)) +/** + * iommu_copy_struct_from_full_user_array - Copy iommu driver specific user + * space data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @kdst_entry_size: sizeof(*kdst) + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * + * Copy the entire user array. kdst must have room for kdst_entry_size * + * user_array->entry_num bytes. Return 0 for success, otherwise -error. + */ +static inline int +iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, + struct iommu_user_data_array *user_array, + unsigned int data_type) +{ + unsigned int i; + int ret; + + if (user_array->type != data_type) + return -EINVAL; + if (!user_array->entry_num) + return -EINVAL; + if (likely(user_array->entry_len == kdst_entry_size)) { + if (copy_from_user(kdst, user_array->uptr, + user_array->entry_num * + user_array->entry_len)) + return -EFAULT; + } + + /* Copy item by item */ + for (i = 0; i != user_array->entry_num; i++) { + ret = copy_struct_from_user( + kdst + kdst_entry_size * i, kdst_entry_size, + user_array->uptr + user_array->entry_len * i, + user_array->entry_len); + if (ret) + return ret; + } + return 0; +} + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability -- cgit v1.2.3 From c747e67978ff473e6830afe0b1f3986dd1f444b5 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:14 -0800 Subject: iommufd/viommu: Add iommufd_viommu_find_dev helper This avoids a bigger trouble of exposing struct iommufd_device and struct iommufd_vdevice in the public header. Link: https://patch.msgid.link/r/84fa7c624db4d4508067ccfdf42059533950180a.1730836308.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 2bc735ff9511..11110c749200 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -185,6 +185,8 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type); +struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, + unsigned long vdev_id); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -192,6 +194,12 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct device * +iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) +{ + return NULL; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From ad8d1e323dd37f91d0c973e2a74c7b9054219adc Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 2 Sep 2024 07:39:20 +0100 Subject: ARM: 9415/1: amba: Add dev_is_amba() function and export it for modules Add dev_is_amba() function to determine whether the device is a AMBA device. Suggested-by: Andy Shevchenko Signed-off-by: Kunwu Chan Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index dda2f3ea89cb..9946276aff73 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -121,6 +121,7 @@ extern const struct bus_type amba_bustype; #ifdef CONFIG_ARM_AMBA int __amba_driver_register(struct amba_driver *, struct module *); void amba_driver_unregister(struct amba_driver *); +bool dev_is_amba(const struct device *dev); #else static inline int __amba_driver_register(struct amba_driver *drv, struct module *owner) @@ -130,6 +131,10 @@ static inline int __amba_driver_register(struct amba_driver *drv, static inline void amba_driver_unregister(struct amba_driver *drv) { } +static inline bool dev_is_amba(const struct device *dev) +{ + return false; +} #endif struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t); -- cgit v1.2.3 From 69d9b312f38aa19f8c801e90bd23d70685be49f0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:52 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_VIOMMU_ALLOC Add a new driver-type for ARM SMMUv3 to enum iommu_viommu_type. Implement an arm_vsmmu_alloc(). As an initial step, copy the VMID from s2_parent. A followup series is required to give the VIOMMU object it's own VMID that will be used in all nesting configurations. Link: https://patch.msgid.link/r/8-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f4f76759b738..7cb13a29969d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -425,10 +425,12 @@ struct iommu_hwpt_vtd_s1 { * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table + * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table */ enum iommu_hwpt_data_type { IOMMU_HWPT_DATA_NONE = 0, IOMMU_HWPT_DATA_VTD_S1 = 1, + IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, }; /** @@ -868,9 +870,11 @@ struct iommu_fault_alloc { /** * enum iommu_viommu_type - Virtual IOMMU Type * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type */ enum iommu_viommu_type { IOMMU_VIOMMU_TYPE_DEFAULT = 0, + IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, }; /** -- cgit v1.2.3 From 1e8be08d1c91d52a9b51d424db78ddbf88660bbb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:53 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting as the parent and a user provided STE fragment that defines the CD table and related data with addresses translated by the S2 iommu_domain. The kernel only permits userspace to control certain allowed bits of the STE that are safe for user/guest control. IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2 translation, but there is no way of knowing which S1 entries refer to a range of S2. For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to flush all ASIDs from the VMID after flushing the S2 on any change to the S2. The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the invalidation path relies on the VIOMMU to translate virtual stream ID used in the invalidation commands for the CD table and ATS. Link: https://patch.msgid.link/r/9-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 7cb13a29969d..b6baaa1e55b1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -421,6 +421,26 @@ struct iommu_hwpt_vtd_s1 { __u32 __reserved; }; +/** + * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE + * (IOMMU_HWPT_DATA_ARM_SMMUV3) + * + * @ste: The first two double words of the user space Stream Table Entry for + * the translation. Must be little-endian. + * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) + * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax + * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * + * -EIO will be returned if @ste is not legal or contains any non-allowed field. + * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass + * nested domain will translate the same as the nesting parent. The S1 will + * install a Context Descriptor Table pointing at userspace memory translated + * by the nesting parent. + */ +struct iommu_hwpt_arm_smmuv3 { + __aligned_le64 ste[2]; +}; + /** * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data -- cgit v1.2.3 From 67e4fe3985138325c9b21193be52266750616182 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:54 -0300 Subject: iommu/arm-smmu-v3: Use S2FWB for NESTED domains Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field works. When S2FWB is supported and enabled the IOPTE will force cachable access to IOMMU_CACHE memory when nesting with a S1 and deny cachable access when !IOMMU_CACHE. When using a single stage of translation, a simple S2 domain, it doesn't change things for PCI devices as it is just a different encoding for the existing mapping of the IOMMU protection flags to cachability attributes. For non-PCI it also changes the combining rules when incoming transactions have inconsistent attributes. However, when used with a nested S1, FWB has the effect of preventing the guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to bypass the cache. Consistent with KVM we wish to deny the guest the ability to become incoherent with cached memory the hypervisor believes is cachable so we don't have to flush it. Allow NESTED domains to be created if the SMMU has S2FWB support and use S2FWB for NESTING_PARENTS. This is an additional option to CANWBS. Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/io-pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b1ecfc3cd5bc..ce86b09ae80f 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -87,6 +87,7 @@ struct io_pgtable_cfg { * attributes set in the TCR for a non-coherent page-table walker. * * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. + * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -95,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) + #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From f27298a82ba09a1c8aecee8a209b2a312beac672 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:55 -0300 Subject: iommu/arm-smmu-v3: Allow ATS for IOMMU_DOMAIN_NESTED The EATS flag needs to flow through the vSTE and into the pSTE, and ensure physical ATS is enabled on the PCI device. The physical ATS state must match the VM's idea of EATS as we rely on the VM to issue the ATS invalidation commands. Thus ATS must remain off at the device until EATS on a nesting domain turns it on. Attaching a nesting domain is the point where the invalidation responsibility transfers to userspace. Update the ATS logic to track EATS for nesting domains and flush the ATC whenever the S2 nesting parent changes. Link: https://patch.msgid.link/r/11-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b6baaa1e55b1..a66eb0384cd6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -429,7 +429,7 @@ struct iommu_hwpt_vtd_s1 { * the translation. Must be little-endian. * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax - * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD * * -EIO will be returned if @ste is not legal or contains any non-allowed field. * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass -- cgit v1.2.3 From d68beb276ba26cec47350a6d468e967673ee0c56 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:56 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object Implement the vIOMMU's cache_invalidate op for user space to invalidate the IOTLB entries, Device ATS and CD entries that are cached by hardware. Add struct iommu_viommu_arm_smmuv3_invalidate defining invalidation entries that are simply in the native format of a 128-bit TLBI command. Scan those commands against the permitted command list and fix their VMID/SID fields to match what is stored in the vIOMMU. Link: https://patch.msgid.link/r/12-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Co-developed-by: Eric Auger Signed-off-by: Eric Auger Co-developed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a66eb0384cd6..747d3d9baa3d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -713,9 +713,11 @@ struct iommu_hwpt_get_dirty_bitmap { * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation * Data Type * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 + * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 */ enum iommu_hwpt_invalidate_data_type { IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, + IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, }; /** @@ -754,6 +756,28 @@ struct iommu_hwpt_vtd_s1_invalidate { __u32 __reserved; }; +/** + * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation + * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) + * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. + * Must be little-endian. + * + * Supported command list only when passing in a vIOMMU via @hwpt_id: + * CMDQ_OP_TLBI_NSNH_ALL + * CMDQ_OP_TLBI_NH_VA + * CMDQ_OP_TLBI_NH_VAA + * CMDQ_OP_TLBI_NH_ALL + * CMDQ_OP_TLBI_NH_ASID + * CMDQ_OP_ATC_INV + * CMDQ_OP_CFGI_CD + * CMDQ_OP_CFGI_CD_ALL + * + * -EIO will be returned if the command is not supported. + */ +struct iommu_viommu_arm_smmuv3_invalidate { + __aligned_le64 cmd[2]; +}; + /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) -- cgit v1.2.3 From 134e9d035d830aabd1121bcda89f7ee9a476d3a3 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Mon, 11 Nov 2024 16:24:43 -0800 Subject: dt-bindings: power: qcom,rpmpd: document the SM8750 RPMh Power Domains Document the RPMh Power Domains on the SM8750 Platform. Signed-off-by: Taniya Das Signed-off-by: Jishnu Prakash Signed-off-by: Melody Olvera Message-ID: <20241112002444.2802092-2-quic_molvera@quicinc.com> Signed-off-by: Ulf Hansson --- include/dt-bindings/power/qcom-rpmpd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index 608087fb9a3d..df599bf46220 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -218,6 +218,7 @@ /* SDM845 Power Domain performance levels */ #define RPMH_REGULATOR_LEVEL_RETENTION 16 #define RPMH_REGULATOR_LEVEL_MIN_SVS 48 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_D3 50 #define RPMH_REGULATOR_LEVEL_LOW_SVS_D2 52 #define RPMH_REGULATOR_LEVEL_LOW_SVS_D1 56 #define RPMH_REGULATOR_LEVEL_LOW_SVS_D0 60 @@ -238,6 +239,7 @@ #define RPMH_REGULATOR_LEVEL_TURBO_L1 416 #define RPMH_REGULATOR_LEVEL_TURBO_L2 432 #define RPMH_REGULATOR_LEVEL_TURBO_L3 448 +#define RPMH_REGULATOR_LEVEL_TURBO_L4 452 #define RPMH_REGULATOR_LEVEL_SUPER_TURBO 464 #define RPMH_REGULATOR_LEVEL_SUPER_TURBO_NO_CPR 480 -- cgit v1.2.3 From 61952bb73486fff0f5550bccdf4062d9dd0fb163 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2024 18:00:38 +0100 Subject: block: remove the write_hint field from struct request The write_hint is only used for read/write requests, which must have a bio attached to them. Just use the bio field instead. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20241112170050.1612998-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2035fad3131f..2804fe181d9d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -156,7 +156,6 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif - enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; -- cgit v1.2.3 From 6975c1a486a40446b5bc77a89d9c520f8296fd08 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2024 18:00:39 +0100 Subject: block: remove the ioprio field from struct request The request ioprio is only initialized from the first attached bio, so requests without a bio already never set it. Directly use the bio field instead. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20241112170050.1612998-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 7 +++---- include/trace/events/block.h | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2804fe181d9d..a28264442948 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -156,8 +156,6 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif - unsigned short ioprio; - enum mq_rq_state state; atomic_t ref; @@ -221,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq) static inline unsigned short req_get_ioprio(struct request *req) { - return req->ioprio; + if (req->bio) + return req->bio->bi_ioprio; + return 0; } #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) @@ -984,7 +984,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, rq->nr_phys_segments = nr_segs; rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; - rq->ioprio = bio_prio(bio); } void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 1527d5d45e01..bd0ea07338eb 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -99,7 +99,7 @@ TRACE_EVENT(block_rq_requeue, __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); - __entry->ioprio = rq->ioprio; + __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; @@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(block_rq_completion, __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = blk_status_to_errno(error); - __entry->ioprio = rq->ioprio; + __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; @@ -209,7 +209,7 @@ DECLARE_EVENT_CLASS(block_rq, __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); - __entry->ioprio = rq->ioprio; + __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; -- cgit v1.2.3 From 174dd22a781b97cb355b1037f0931436a254d3be Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Nov 2024 09:38:26 -0800 Subject: srcu: Remove smp_mb() from srcu_read_unlock_lite() The srcu_read_unlock_lite() function invokes __srcu_read_unlock() instead of __srcu_read_unlock_lite(), which means that it is doing an unnecessary smp_mb(). This is harmless other than the performance degradation. This commit therefore switches to __srcu_read_unlock_lite(). Reported-by: Neeraj Upadhyay Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/ Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()") Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index bab1dae3f69e..56f83237de4d 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -372,7 +372,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) WARN_ON_ONCE(idx & ~0x1); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); srcu_lock_release(&ssp->dep_map); - __srcu_read_unlock(ssp, idx); + __srcu_read_unlock_lite(ssp, idx); } /** -- cgit v1.2.3 From a76ab5731e32d50ff5b1ae97e9dc4b23f41c23f5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 Nov 2024 08:39:07 -0800 Subject: bpf: Find eligible subprogs for private stack support Private stack will be allocated with percpu allocator in jit time. To avoid complexity at runtime, only one copy of private stack is available per cpu per prog. So runtime recursion check is necessary to avoid stack corruption. Current private stack only supports kprobe/perf_event/tp/raw_tp which has recursion check in the kernel, and prog types that use bpf trampoline recursion check. For trampoline related prog types, currently only tracing progs have recursion checking. To avoid complexity, all async_cb subprogs use normal kernel stack including those subprogs used by both main prog subtree and async_cb subtree. Any prog having tail call also uses kernel stack. To avoid jit penalty with private stack support, a subprog stack size threshold is set such that only if the stack size is no less than the threshold, private stack is supported. The current threshold is 64 bytes. This avoids jit penality if the stack usage is small. A useless 'continue' is also removed from a loop in func check_max_stack_depth(). Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20241112163907.2223839-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 7 +++++++ include/linux/filter.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3a74033d49c4..d62bb2ca1828 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -633,6 +633,12 @@ struct bpf_subprog_arg_info { }; }; +enum priv_stack_mode { + PRIV_STACK_UNKNOWN, + NO_PRIV_STACK, + PRIV_STACK_ADAPTIVE, +}; + struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ @@ -653,6 +659,7 @@ struct bpf_subprog_info { /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; + enum priv_stack_mode priv_stack_mode; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 7d7578a8eac1..3a21947f2fd4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1119,6 +1119,7 @@ bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); +bool bpf_jit_supports_private_stack(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); -- cgit v1.2.3 From e00931c02568dc6ac76f94b1ab471de05e6fdfe8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 Nov 2024 08:39:12 -0800 Subject: bpf: Enable private stack for eligible subprogs If private stack is used by any subprog, set that subprog prog->aux->jits_use_priv_stack to be true so later jit can allocate private stack for that subprog properly. Also set env->prog->aux->jits_use_priv_stack to be true if any subprog uses private stack. This is a use case for a single main prog (no subprogs) to use private stack, and also a use case for later struct-ops progs where env->prog->aux->jits_use_priv_stack will enable recursion check if any subprog uses private stack. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20241112163912.2224007-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7da41ae2eac8..129b29e85cec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1523,6 +1523,7 @@ struct bpf_prog_aux { bool exception_cb; bool exception_boundary; bool is_extended; /* true if extended by freplace program */ + bool jits_use_priv_stack; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; -- cgit v1.2.3 From 7d1cd70d4b16ff0216a5f6c2ae7d0fa9fa978c07 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 Nov 2024 08:39:22 -0800 Subject: bpf, x86: Support private stack in jit Private stack is allocated in function bpf_int_jit_compile() with alignment 8. Private stack allocation size includes the stack size determined by verifier and additional space to protect stack overflow and underflow. See below an illustration: ---> memory address increasing [8 bytes to protect overflow] [normal stack] [8 bytes to protect underflow] If overflow/underflow is detected, kernel messages will be emited in dmesg like BPF private stack overflow/underflow detected for prog Fx BPF Private stack overflow/underflow detected for prog bpf_prog_a41699c234a1567a_subprog1x Those messages are generated when I made some changes to jitted code to intentially cause overflow for some progs. For the jited prog, The x86 register 9 (X86_REG_R9) is used to replace bpf frame register (BPF_REG_10). The private stack is used per subprog per cpu. The X86_REG_R9 is saved and restored around every func call (not including tailcall) to maintain correctness of X86_REG_R9. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20241112163922.2224385-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 129b29e85cec..d32cc373dfd1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1507,6 +1507,7 @@ struct bpf_prog_aux { u32 max_rdwr_access; struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; + void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; -- cgit v1.2.3 From 5bd36da1e37e7a78e8b38efd287de6e1394b7d6e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 Nov 2024 08:39:33 -0800 Subject: bpf: Support private stack for struct_ops progs For struct_ops progs, whether a particular prog uses private stack depends on prog->aux->priv_stack_requested setting before actual insn-level verification for that prog. One particular implementation is to piggyback on struct_ops->check_member(). The next patch has an example for this. The struct_ops->check_member() sets prog->aux->priv_stack_requested to be true which enables private stack usage. The struct_ops prog follows the same rule as kprobe/tracing progs after function bpf_enable_priv_stack(). For example, even a struct_ops prog requests private stack, it could still use normal kernel stack if the stack size is small (< 64 bytes). Similar to tracing progs, nested same cpu same prog run will be skipped. A field (recursion_detected()) is added to bpf_prog_aux structure. If bpf_prog->aux->recursion_detected is implemented by the struct_ops subsystem and nested same cpu/prog happens, the function will be triggered to report an error, collect related info, etc. Acked-by: Tejun Heo Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20241112163933.2224962-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/bpf_verifier.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d32cc373dfd1..10945c8858ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1525,9 +1525,11 @@ struct bpf_prog_aux { bool exception_boundary; bool is_extended; /* true if extended by freplace program */ bool jits_use_priv_stack; + bool priv_stack_requested; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; + void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d62bb2ca1828..6b7c91629176 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -879,6 +879,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) case BPF_PROG_TYPE_TRACING: return prog->expected_attach_type != BPF_TRACE_ITER; case BPF_PROG_TYPE_STRUCT_OPS: + return prog->aux->jits_use_priv_stack; case BPF_PROG_TYPE_LSM: return false; default: -- cgit v1.2.3 From 7c8ce4ffb684676039b1ff9ff81c126794e8d88e Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 12 Nov 2024 22:58:49 +0800 Subject: bpf: Add kernel symbol for struct_ops trampoline Without kernel symbols for struct_ops trampoline, the unwinder may produce unexpected stacktraces. For example, the x86 ORC and FP unwinders check if an IP is in kernel text by verifying the presence of the IP's kernel symbol. When a struct_ops trampoline address is encountered, the unwinder stops due to the absence of symbol, resulting in an incomplete stacktrace that consists only of direct and indirect child functions called from the trampoline. The arm64 unwinder is another example. While the arm64 unwinder can proceed across a struct_ops trampoline address, the corresponding symbol name is displayed as "unknown", which is confusing. Thus, add kernel symbol for struct_ops trampoline. The name is bpf___, where is the type name of the struct_ops, and is the name of the member that the trampoline is linked to. Below is a comparison of stacktraces captured on x86 by perf record, before and after this patch. Before: ffffffff8116545d __lock_acquire+0xad ([kernel.kallsyms]) ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms]) ffffffff813088f4 __bpf_prog_enter+0x34 ([kernel.kallsyms]) After: ffffffff811656bd __lock_acquire+0x30d ([kernel.kallsyms]) ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms]) ffffffff81309024 __bpf_prog_enter+0x34 ([kernel.kallsyms]) ffffffffc000d7e9 bpf__tcp_congestion_ops_cong_avoid+0x3e ([kernel.kallsyms]) ffffffff81f250a5 tcp_ack+0x10d5 ([kernel.kallsyms]) ffffffff81f27c66 tcp_rcv_established+0x3b6 ([kernel.kallsyms]) ffffffff81f3ad03 tcp_v4_do_rcv+0x193 ([kernel.kallsyms]) ffffffff81d65a18 __release_sock+0xd8 ([kernel.kallsyms]) ffffffff81d65af4 release_sock+0x34 ([kernel.kallsyms]) ffffffff81f15c4b tcp_sendmsg+0x3b ([kernel.kallsyms]) ffffffff81f663d7 inet_sendmsg+0x47 ([kernel.kallsyms]) ffffffff81d5ab40 sock_write_iter+0x160 ([kernel.kallsyms]) ffffffff8149c67b vfs_write+0x3fb ([kernel.kallsyms]) ffffffff8149caf6 ksys_write+0xc6 ([kernel.kallsyms]) ffffffff8149cb5d __x64_sys_write+0x1d ([kernel.kallsyms]) ffffffff81009200 x64_sys_call+0x1d30 ([kernel.kallsyms]) ffffffff82232d28 do_syscall_64+0x68 ([kernel.kallsyms]) ffffffff8240012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms]) Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Xu Kuohai Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20241112145849.3436772-4-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 10945c8858ce..3ace0d6227e3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1402,7 +1402,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); -- cgit v1.2.3 From 12bbabd3cab8a7dab0ddad8ed1e671f40c7cdeeb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Nov 2024 18:01:25 +0200 Subject: usb: cdns3: Synchronise PCI IDs via common data base There are a few places in the kernel where PCI IDs for different Cadence USB controllers are being used. Besides different naming, they duplicate each other. Make this all in order by providing common definitions via PCI IDs database and use in all users. While doing that, rename definitions as Roger suggested. Suggested-by: Roger Quadros Suggested-by: Greg Kroah-Hartman Signed-off-by: Andy Shevchenko Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20241112160125.2340972-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4cf6aaed5f35..eff9eccc52fc 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -121,6 +121,7 @@ #define PCI_CLASS_SERIAL_USB_OHCI 0x0c0310 #define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320 #define PCI_CLASS_SERIAL_USB_XHCI 0x0c0330 +#define PCI_CLASS_SERIAL_USB_CDNS 0x0c0380 #define PCI_CLASS_SERIAL_USB_DEVICE 0x0c03fe #define PCI_CLASS_SERIAL_FIBER 0x0c04 #define PCI_CLASS_SERIAL_SMBUS 0x0c05 @@ -2421,6 +2422,9 @@ #define PCI_VENDOR_ID_QCOM 0x17cb #define PCI_VENDOR_ID_CDNS 0x17cd +#define PCI_DEVICE_ID_CDNS_USBSS 0x0100 +#define PCI_DEVICE_ID_CDNS_USB 0x0120 +#define PCI_DEVICE_ID_CDNS_USBSSP 0x0200 #define PCI_VENDOR_ID_ARECA 0x17d3 #define PCI_DEVICE_ID_ARECA_1110 0x1110 -- cgit v1.2.3 From 948ccbd95016f50ce01df5eef9440eede3b8c713 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:26 +0800 Subject: LoongArch: KVM: Add iocsr and mmio bus simulation in kernel Add iocsr and mmio memory read and write simulation to the kernel. When the VM accesses the device address space through iocsr instructions or mmio, it does not need to return to the qemu user mode but can directly completes the access in the kernel mode. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/linux/kvm_host.h | 1 + include/trace/events/kvm.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45be36e5285f..164d9725cb08 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -219,6 +219,7 @@ enum kvm_bus { KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, + KVM_IOCSR_BUS, KVM_NR_BUSES }; diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 74e40d5d4af4..fc7d0f8ff078 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -236,6 +236,41 @@ TRACE_EVENT(kvm_mmio, __entry->len, __entry->gpa, __entry->val) ); +#define KVM_TRACE_IOCSR_READ_UNSATISFIED 0 +#define KVM_TRACE_IOCSR_READ 1 +#define KVM_TRACE_IOCSR_WRITE 2 + +#define kvm_trace_symbol_iocsr \ + { KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \ + { KVM_TRACE_IOCSR_READ, "read" }, \ + { KVM_TRACE_IOCSR_WRITE, "write" } + +TRACE_EVENT(kvm_iocsr, + TP_PROTO(int type, int len, u64 gpa, void *val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( + __field( u32, type ) + __field( u32, len ) + __field( u64, gpa ) + __field( u64, val ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); + ), + + TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx", + __print_symbolic(__entry->type, kvm_trace_symbol_iocsr), + __entry->len, __entry->gpa, __entry->val) +); + #define kvm_fpu_load_symbol \ {0, "unload"}, \ {1, "load"} -- cgit v1.2.3 From c532de5a67a70f8533d495f8f2aaa9a0491c3ad0 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: LoongArch: KVM: Add IPI device support Add device model for IPI interrupt controller, implement basic create & destroy interfaces, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 637efc055145..9fff439c30ea 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1158,7 +1158,11 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LOONGARCH_IPI, +#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { -- cgit v1.2.3 From 2e8b9df82631e714cc2b7bf302772c8259673180 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: LoongArch: KVM: Add EIOINTC device support Add device model for EIOINTC interrupt controller, implement basic create & destroy interfaces, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9fff439c30ea..0ec5c631d9e9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1160,6 +1160,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_LOONGARCH_IPI, #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_LOONGARCH_EIOINTC, +#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_MAX, -- cgit v1.2.3 From e785dfacf7e7fe94370fa0e8e3ff1bc8fe179831 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: LoongArch: KVM: Add PCHPIC device support Add device model for PCHPIC interrupt controller, implemente basic create & destroy interface, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0ec5c631d9e9..502ea63b5d2e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1162,6 +1162,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_EIOINTC, #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC + KVM_DEV_TYPE_LOONGARCH_PCHPIC, +#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_MAX, -- cgit v1.2.3 From da115c4ee29f589bb72ec2e86eb5e196b6bbcb41 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Nov 2024 15:29:15 +0100 Subject: printk: add dummy printk_force_console_enter/exit helpers The newly added interface is broken when PRINTK is disabled: drivers/tty/sysrq.c: In function '__handle_sysrq': drivers/tty/sysrq.c:601:9: error: implicit declaration of function 'printk_force_console_enter' [-Wimplicit-function-declaration] 601 | printk_force_console_enter(); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/tty/sysrq.c:611:25: error: implicit declaration of function 'printk_force_console_exit' [-Wimplicit-function-declaration] 611 | printk_force_console_exit(); | ^~~~~~~~~~~~~~~~~~~~~~~~~ Add empty stub functions for both. Fixes: ed76c07c6885 ("printk: Introduce FORCE_CON flag") Signed-off-by: Arnd Bergmann Reviewed-by: Andy Shevchenko Reviewed-by: Marcos Paulo de Souza Tested-by: Marcos Paulo de Souza Link: https://lore.kernel.org/r/20241112142939.724093-1-arnd@kernel.org Signed-off-by: Petr Mladek --- include/linux/printk.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 232e5fd06701..4217a9f412b2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -232,6 +232,14 @@ static inline void printk_deferred_exit(void) { } +static inline void printk_force_console_enter(void) +{ +} + +static inline void printk_force_console_exit(void) +{ +} + static inline int printk_ratelimit(void) { return 0; -- cgit v1.2.3 From 1d58f7f3a1373734b2e86a246edcf1cd39359f3e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 25 Oct 2024 21:31:01 +0100 Subject: clocksource/drivers/dw_apb: Remove unused dw_apb_clockevent functions dw_apb_clockevent_pause(), dw_apb_clockevent_resume() and dw_apb_clockevent_stop() have been unused since 2021's commit 1b79fc4f2bfd ("x86/apb_timer: Remove driver for deprecated platform") Remove them. (Some of the other clockevent functions are still called by dw_apb_timer_of.c so I guess it is still in use?) Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20241025203101.241709-1-linux@treblig.org Signed-off-by: Daniel Lezcano --- include/linux/dw_apb_timer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h index 82ebf9223948..f8811c46b89e 100644 --- a/include/linux/dw_apb_timer.h +++ b/include/linux/dw_apb_timer.h @@ -34,9 +34,6 @@ struct dw_apb_clocksource { }; void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced); -void dw_apb_clockevent_pause(struct dw_apb_clock_event_device *dw_ced); -void dw_apb_clockevent_resume(struct dw_apb_clock_event_device *dw_ced); -void dw_apb_clockevent_stop(struct dw_apb_clock_event_device *dw_ced); struct dw_apb_clock_event_device * dw_apb_clockevent_init(int cpu, const char *name, unsigned rating, -- cgit v1.2.3 From 44010543fc8bedad172aa5b6c43480e5d2124497 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Nov 2024 10:09:57 -0500 Subject: fs: add the ability for statmount() to report the sb_source /proc/self/mountinfo displays the source for the mount, but statmount() doesn't yet have a way to return it. Add a new STATMOUNT_SB_SOURCE flag, claim the 32-bit __spare1 field to hold the offset into the str[] array. Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241111-statmount-v4-3-2eaf35d07a80@kernel.org Acked-by: Miklos Szeredi Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 2e939dddf9cb..2b49e9131d77 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -174,7 +174,7 @@ struct statmount { __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ - __u32 __spare1[1]; + __u32 sb_source; /* [str] Source string of the mount */ __u64 __spare2[48]; char str[]; /* Variable size part containing strings */ }; @@ -210,6 +210,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ #define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 2f4d4503e9e5ab765a7948f98bc5deef7850f607 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 12 Nov 2024 11:10:04 +0100 Subject: statmount: add flag to retrieve unescaped options Filesystem options can be retrieved with STATMOUNT_MNT_OPTS, which returns a string of comma separated options, where some characters are escaped using the \OOO notation. Add a new flag, STATMOUNT_OPT_ARRAY, which instead returns the raw option values separated with '\0' charaters. Since escaped charaters are rare, this inteface is preferable for non-libmount users which likley don't want to deal with option de-escaping. Example code: if (st->mask & STATMOUNT_OPT_ARRAY) { const char *opt = st->str + st->opt_array; for (unsigned int i = 0; i < st->opt_num; i++) { printf("opt_array[%i]: <%s>\n", i, opt); opt += strlen(opt) + 1; } } Example ouput: (1) mnt_opts: (2) opt_array[0]: opt_array[1]: opt_array[2]: opt_array[3]: opt_array[4]: opt_array[5]: Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20241112101006.30715-1-mszeredi@redhat.com Acked-by: Jeff Layton [brauner: tweak variable naming and parsing add example output] Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 2b49e9131d77..c0fda4604187 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 mnt_opts; /* [str] Mount options of the mount */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -175,7 +175,9 @@ struct statmount { __u64 mnt_ns_id; /* ID of the mount namespace */ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ __u32 sb_source; /* [str] Source string of the mount */ - __u64 __spare2[48]; + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u64 __spare2[47]; char str[]; /* Variable size part containing strings */ }; @@ -211,6 +213,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ #define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 95f567f81e43a1bcb5fbf0559e55b7505707300d Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Fri, 1 Nov 2024 15:37:03 -0400 Subject: fs: Simplify getattr interface function checking AT_GETATTR_NOSEC flag Commit 8a924db2d7b5 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function")' introduced the AT_GETATTR_NOSEC flag to ensure that the call paths only call vfs_getattr_nosec if it is set instead of vfs_getattr. Now, simplify the getattr interface functions of filesystems where the flag AT_GETATTR_NOSEC is checked. There is only a single caller of inode_operations getattr function and it is located in fs/stat.c in vfs_getattr_nosec. The caller there is the only one from which the AT_GETATTR_NOSEC flag is passed from. Two filesystems are checking this flag in .getattr and the flag is always passed to them unconditionally from only vfs_getattr_nosec: - ecryptfs: Simplify by always calling vfs_getattr_nosec in ecryptfs_getattr. From there the flag is passed to no other function and this function is not called otherwise. - overlayfs: Simplify by always calling vfs_getattr_nosec in ovl_getattr. From there the flag is passed to no other function and this function is not called otherwise. The query_flags in vfs_getattr_nosec will mask-out AT_GETATTR_NOSEC from any caller using AT_STATX_SYNC_TYPE as mask so that the flag is not important inside this function. Also, since no filesystem is checking the flag anymore, remove the flag entirely now, including the BUG_ON check that never triggered. The net change of the changes here combined with the original commit is that ecryptfs and overlayfs do not call vfs_getattr but only vfs_getattr_nosec. Fixes: 8a924db2d7b5 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function") Reported-by: Al Viro Closes: https://lore.kernel.org/linux-fsdevel/20241101011724.GN1350452@ZenIV/T/#u Cc: Tyler Hicks Cc: ecryptfs@vger.kernel.org Cc: Miklos Szeredi Cc: Amir Goldstein Cc: linux-unionfs@vger.kernel.org Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christian Brauner Signed-off-by: Stefan Berger Signed-off-by: Al Viro --- include/uapi/linux/fcntl.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 87e2dec79fea..a40833bf2855 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -154,8 +154,4 @@ usable with open_by_handle_at(2). */ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ -#if defined(__KERNEL__) -#define AT_GETATTR_NOSEC 0x80000000 -#endif - #endif /* _UAPI_LINUX_FCNTL_H */ -- cgit v1.2.3 From 2bd9077b6261b8f1281d0fa74d51afe090319263 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 1 Nov 2024 14:45:23 -0600 Subject: jbd2: avoid dozens of -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Use the `DEFINE_RAW_FLEX()` helper for an on-stack definition of a flexible structure (`struct shash_desc`) where the size of the flexible-array member (`__ctx`) is known at compile-time, and refactor the rest of the code, accordingly. So, with this, fix 77 of the following warnings: include/linux/jbd2.h:1800:35: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Jan Kara Link: https://patch.msgid.link/ZyU94w0IALVhc9Jy@kspp Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8aef9bb6ad57..50f7ea8714bf 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1796,22 +1796,21 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[JBD_MAX_CHECKSUM_SIZE]; - } desc; + DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx, + DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE, + sizeof(*((struct shash_desc *)0)->__ctx))); int err; BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > JBD_MAX_CHECKSUM_SIZE); - desc.shash.tfm = journal->j_chksum_driver; - *(u32 *)desc.ctx = crc; + desc->tfm = journal->j_chksum_driver; + *(u32 *)desc->__ctx = crc; - err = crypto_shash_update(&desc.shash, address, length); + err = crypto_shash_update(desc, address, length); BUG_ON(err); - return *(u32 *)desc.ctx; + return *(u32 *)desc->__ctx; } /* Return most recent uncommitted transaction */ -- cgit v1.2.3 From 470d2bc3a0bc19a849cc7478c02d3f5ecaa1233e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2024 09:45:35 +0100 Subject: block: export blk_validate_limits While block drivers do the validation as part of committing them to the queue, users that use the limit outside of a block device context have to validate the limits and fill in the calculated values as well. So far btrfs is the only user of queue limits without a block device, and it has gotten away with that more or less by accident. But with commit 559218d43ec9 ("block: pre-calculate max_zone_append_sectors") this became fatal for setups that have small max zone append size, as it won't be limited now. Export blk_validate_limits so that it can be called directly from btrfs. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241113084541.34315-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 65f37ae70712..cd905afaf51a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -948,6 +948,7 @@ queue_limits_start_update(struct request_queue *q) int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim); int queue_limits_set(struct request_queue *q, struct queue_limits *lim); +int blk_validate_limits(struct queue_limits *lim); /** * queue_limits_cancel_update - cancel an atomic update of queue limits -- cgit v1.2.3 From f3dd9ae7f03aefa5bb12a4606f3d6cca87863622 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 1 Sep 2024 17:17:09 +0200 Subject: dax: Remove an unused field in struct dax_operations .dax_supported() was apparently removed by commit 7b0800d00dae ("dax: remove dax_capable") on 2021-11. Remove the now unused function pointer from the struct dax_operations. Signed-off-by: Christophe JAILLET Reviewed-by: Jan Kara Link: https://patch.msgid.link/56b92b722ca0a6fd1387c871a6ec01bcb9bd525e.1725203804.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ira Weiny --- include/linux/dax.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 9d3e3327af4c..df41a0017b31 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -27,12 +27,6 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, enum dax_access_mode, void **, pfn_t *); - /* - * Validate whether this device is usable as an fsdax backing - * device. - */ - bool (*dax_supported)(struct dax_device *, struct block_device *, int, - sector_t, sector_t); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); /* -- cgit v1.2.3 From e8225ab15006fbcdb14cef426a0a54475292fbbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2024 16:20:43 +0100 Subject: block: remove rq_list_move Unused now. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241113152050.157179-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a28264442948..ad26a41d13f9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -268,23 +268,6 @@ static inline unsigned short req_get_ioprio(struct request *req) #define rq_list_next(rq) (rq)->rq_next #define rq_list_empty(list) ((list) == (struct request *) NULL) -/** - * rq_list_move() - move a struct request from one list to another - * @src: The source list @rq is currently in - * @dst: The destination list that @rq will be appended to - * @rq: The request to move - * @prev: The request preceding @rq in @src (NULL if @rq is the head) - */ -static inline void rq_list_move(struct request **src, struct request **dst, - struct request *rq, struct request *prev) -{ - if (prev) - prev->rq_next = rq->rq_next; - else - *src = rq->rq_next; - rq_list_add(dst, rq); -} - /** * enum blk_eh_timer_return - How the timeout handler should proceed * @BLK_EH_DONE: The block driver completed the command or will complete it at -- cgit v1.2.3 From a3396b99990d8b4e5797e7b16fdeb64c15ae97bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2024 16:20:44 +0100 Subject: block: add a rq_list type Replace the semi-open coded request list helpers with a proper rq_list type that mirrors the bio_list and has head and tail pointers. Besides better type safety this actually allows to insert at the tail of the list, which will be useful soon. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241113152050.157179-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 88 +++++++++++++++++++++++++++++--------------------- include/linux/blkdev.h | 11 +++++-- 2 files changed, 60 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ad26a41d13f9..c61e04365677 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -229,44 +229,60 @@ static inline unsigned short req_get_ioprio(struct request *req) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) -#define rq_list_add(listptr, rq) do { \ - (rq)->rq_next = *(listptr); \ - *(listptr) = rq; \ -} while (0) - -#define rq_list_add_tail(lastpptr, rq) do { \ - (rq)->rq_next = NULL; \ - **(lastpptr) = rq; \ - *(lastpptr) = &rq->rq_next; \ -} while (0) - -#define rq_list_pop(listptr) \ -({ \ - struct request *__req = NULL; \ - if ((listptr) && *(listptr)) { \ - __req = *(listptr); \ - *(listptr) = __req->rq_next; \ - } \ - __req; \ -}) +static inline int rq_list_empty(const struct rq_list *rl) +{ + return rl->head == NULL; +} -#define rq_list_peek(listptr) \ -({ \ - struct request *__req = NULL; \ - if ((listptr) && *(listptr)) \ - __req = *(listptr); \ - __req; \ -}) +static inline void rq_list_init(struct rq_list *rl) +{ + rl->head = NULL; + rl->tail = NULL; +} + +static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq) +{ + rq->rq_next = NULL; + if (rl->tail) + rl->tail->rq_next = rq; + else + rl->head = rq; + rl->tail = rq; +} + +static inline void rq_list_add_head(struct rq_list *rl, struct request *rq) +{ + rq->rq_next = rl->head; + rl->head = rq; + if (!rl->tail) + rl->tail = rq; +} + +static inline struct request *rq_list_pop(struct rq_list *rl) +{ + struct request *rq = rl->head; + + if (rq) { + rl->head = rl->head->rq_next; + if (!rl->head) + rl->tail = NULL; + rq->rq_next = NULL; + } + + return rq; +} -#define rq_list_for_each(listptr, pos) \ - for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) +static inline struct request *rq_list_peek(struct rq_list *rl) +{ + return rl->head; +} -#define rq_list_for_each_safe(listptr, pos, nxt) \ - for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \ - pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL) +#define rq_list_for_each(rl, pos) \ + for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next) -#define rq_list_next(rq) (rq)->rq_next -#define rq_list_empty(list) ((list) == (struct request *) NULL) +#define rq_list_for_each_safe(rl, pos, nxt) \ + for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \ + pos; pos = nxt, nxt = pos ? pos->rq_next : NULL) /** * enum blk_eh_timer_return - How the timeout handler should proceed @@ -559,7 +575,7 @@ struct blk_mq_ops { * empty the @rqlist completely, then the rest will be queued * individually by the block layer upon return. */ - void (*queue_rqs)(struct request **rqlist); + void (*queue_rqs)(struct rq_list *rqlist); /** * @get_budget: Reserve budget before queue request, once .queue_rq is @@ -868,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, else if (iob->complete != complete) return false; iob->need_ts |= blk_mq_need_time_stamp(req); - rq_list_add(&iob->req_list, req); + rq_list_add_head(&iob->req_list, req); return true; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd905afaf51a..00212e96261a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1007,6 +1007,11 @@ extern void blk_put_queue(struct request_queue *); void blk_mark_disk_dead(struct gendisk *disk); #ifdef CONFIG_BLOCK +struct rq_list { + struct request *head; + struct request *tail; +}; + /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests @@ -1019,10 +1024,10 @@ void blk_mark_disk_dead(struct gendisk *disk); * blk_flush_plug() is called. */ struct blk_plug { - struct request *mq_list; /* blk-mq requests */ + struct rq_list mq_list; /* blk-mq requests */ /* if ios_left is > 1, we can batch tag/rq allocations */ - struct request *cached_rq; + struct rq_list cached_rqs; u64 cur_ktime; unsigned short nr_ios; @@ -1684,7 +1689,7 @@ int bdev_thaw(struct block_device *bdev); void bdev_fput(struct file *bdev_file); struct io_comp_batch { - struct request *req_list; + struct rq_list req_list; bool need_ts; void (*complete)(struct io_comp_batch *); }; -- cgit v1.2.3 From 00e8d290b55f2fa5c5a0500b4dccf9e090650447 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2024 16:20:46 +0100 Subject: block: don't reorder requests in blk_mq_add_to_batch LIFO ordering for batched completions is a bit unexpected and also defeats some merging optimizations in e.g. the XFS buffered write code. Now that we can easily add the request to the tail of the list do that. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241113152050.157179-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c61e04365677..c596e0e4cb75 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -884,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, else if (iob->complete != complete) return false; iob->need_ts |= blk_mq_need_time_stamp(req); - rq_list_add_head(&iob->req_list, req); + rq_list_add_tail(&iob->req_list, req); return true; } -- cgit v1.2.3 From 27184f8905ba680f22abf1707fbed24036a67119 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 13 Nov 2024 07:54:12 +0200 Subject: tpm: Opt-in in disable PCR integrity protection The initial HMAC session feature added TPM bus encryption and/or integrity protection to various in-kernel TPM operations. This can cause performance bottlenecks with IMA, as it heavily utilizes PCR extend operations. In order to mitigate this performance issue, introduce a kernel command-line parameter to the TPM driver for disabling the integrity protection for PCR extend operations (i.e. TPM2_PCR_Extend). Cc: James Bottomley Link: https://lore.kernel.org/linux-integrity/20241015193916.59964-1-zohar@linux.ibm.com/ Fixes: 6519fea6fd37 ("tpm: add hmac checks to tpm2_pcr_extend()") Tested-by: Mimi Zohar Co-developed-by: Roberto Sassu Signed-off-by: Roberto Sassu Co-developed-by: Mimi Zohar Signed-off-by: Mimi Zohar Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 587b96b4418e..20a40ade8030 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -421,6 +421,7 @@ void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value); u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset); u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset); u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset); +void tpm_buf_append_handle(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle); /* * Check if TPM device is in the firmware upgrade mode. @@ -505,6 +506,8 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, u8 *passphrase, int passphraselen); +void tpm_buf_append_auth(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, int passphraselen); static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, -- cgit v1.2.3 From 7c1ae151e81268db1fe8c8a473d922fc5ba47b72 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 13 Nov 2024 13:51:54 +0200 Subject: virtio_pci: Introduce device parts access commands Introduce device parts access commands via the admin queue. These commands and their structure adhere to the Virtio 1.4 specification. Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20241113115200.209269-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_pci.h | 131 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index a8208492e822..1beb317df1b9 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -40,6 +40,7 @@ #define _LINUX_VIRTIO_PCI_H #include +#include #ifndef VIRTIO_PCI_NO_LEGACY @@ -240,6 +241,17 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 +/* Device parts access commands. */ +#define VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY 0x7 +#define VIRTIO_ADMIN_CMD_DEVICE_CAP_GET 0x8 +#define VIRTIO_ADMIN_CMD_DRIVER_CAP_SET 0x9 +#define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_CREATE 0xa +#define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_DESTROY 0xd +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_GET 0xe +#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET 0xf +#define VIRTIO_ADMIN_CMD_DEV_PARTS_SET 0x10 +#define VIRTIO_ADMIN_CMD_DEV_MODE_SET 0x11 + struct virtio_admin_cmd_hdr { __le16 opcode; /* @@ -286,4 +298,123 @@ struct virtio_admin_cmd_notify_info_result { struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; }; +#define VIRTIO_DEV_PARTS_CAP 0x0000 + +struct virtio_dev_parts_cap { + __u8 get_parts_resource_objects_limit; + __u8 set_parts_resource_objects_limit; +}; + +#define MAX_CAP_ID __KERNEL_DIV_ROUND_UP(VIRTIO_DEV_PARTS_CAP + 1, 64) + +struct virtio_admin_cmd_query_cap_id_result { + __le64 supported_caps[MAX_CAP_ID]; +}; + +struct virtio_admin_cmd_cap_get_data { + __le16 id; + __u8 reserved[6]; +}; + +struct virtio_admin_cmd_cap_set_data { + __le16 id; + __u8 reserved[6]; + __u8 cap_specific_data[]; +}; + +struct virtio_admin_cmd_resource_obj_cmd_hdr { + __le16 type; + __u8 reserved[2]; + __le32 id; /* Indicates unique resource object id per resource object type */ +}; + +struct virtio_admin_cmd_resource_obj_create_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + __le64 flags; + __u8 resource_obj_specific_data[]; +}; + +#define VIRTIO_RESOURCE_OBJ_DEV_PARTS 0 + +#define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET 0 +#define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET 1 + +struct virtio_resource_obj_dev_parts { + __u8 type; + __u8 reserved[7]; +}; + +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE 0 +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_COUNT 1 +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_LIST 2 + +struct virtio_admin_cmd_dev_parts_metadata_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + __u8 type; + __u8 reserved[7]; +}; + +#define VIRTIO_DEV_PART_F_OPTIONAL 0 + +struct virtio_dev_part_hdr { + __le16 part_type; + __u8 flags; + __u8 reserved; + union { + struct { + __le32 offset; + __le32 reserved; + } pci_common_cfg; + struct { + __le16 index; + __u8 reserved[6]; + } vq_index; + } selector; + __le32 length; +}; + +struct virtio_dev_part { + struct virtio_dev_part_hdr hdr; + __u8 value[]; +}; + +struct virtio_admin_cmd_dev_parts_metadata_result { + union { + struct { + __le32 size; + __le32 reserved; + } parts_size; + struct { + __le32 count; + __le32 reserved; + } hdr_list_count; + struct { + __le32 count; + __le32 reserved; + struct virtio_dev_part_hdr hdrs[]; + } hdr_list; + }; +}; + +#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_SELECTED 0 +#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL 1 + +struct virtio_admin_cmd_dev_parts_get_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + __u8 type; + __u8 reserved[7]; + struct virtio_dev_part_hdr hdr_list[]; +}; + +struct virtio_admin_cmd_dev_parts_set_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + struct virtio_dev_part parts[]; +}; + +#define VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED 0 + +struct virtio_admin_cmd_dev_mode_set_data { + __u8 flags; +}; + #endif -- cgit v1.2.3 From 704806ca400e5daa86c110f14bfdda9d28203bb7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 13 Nov 2024 13:51:55 +0200 Subject: virtio: Extend the admin command to include the result size Extend the admin command by incorporating a result size field. This allows higher layers to determine the actual result size from the backend when this information is not included in the result_sg. The additional information introduced here will be used in subsequent patches of this series. Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20241113115200.209269-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/linux/virtio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 306137a15d07..b5f7a611715a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -111,6 +111,7 @@ struct virtio_admin_cmd { struct scatterlist *data_sg; struct scatterlist *result_sg; struct completion completion; + u32 result_sg_size; int ret; }; -- cgit v1.2.3 From 52a22c0ed03ce23f20df81f79b23cb6637716fae Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 13 Nov 2024 13:51:57 +0200 Subject: virtio-pci: Introduce APIs to execute device parts admin commands Introduce APIs to handle the execution of device parts admin commands. These APIs cover functionalities such as mode setting, object creation and destruction, and operations like parts get/set and metadata retrieval. These APIs will be utilized in upcoming patches within this series. Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20241113115200.209269-5-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/linux/virtio_pci_admin.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h index f4a100a0fe2e..dffc92c17ad2 100644 --- a/include/linux/virtio_pci_admin.h +++ b/include/linux/virtio_pci_admin.h @@ -20,4 +20,15 @@ int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev, u64 *bar_offset); #endif +bool virtio_pci_admin_has_dev_parts(struct pci_dev *pdev); +int virtio_pci_admin_mode_set(struct pci_dev *pdev, u8 mode); +int virtio_pci_admin_obj_create(struct pci_dev *pdev, u16 obj_type, u8 operation_type, + u32 *obj_id); +int virtio_pci_admin_obj_destroy(struct pci_dev *pdev, u16 obj_type, u32 id); +int virtio_pci_admin_dev_parts_metadata_get(struct pci_dev *pdev, u16 obj_type, + u32 id, u8 metadata_type, u32 *out); +int virtio_pci_admin_dev_parts_get(struct pci_dev *pdev, u16 obj_type, u32 id, + u8 get_type, struct scatterlist *res_sg, u32 *res_size); +int virtio_pci_admin_dev_parts_set(struct pci_dev *pdev, struct scatterlist *data_sg); + #endif /* _LINUX_VIRTIO_PCI_ADMIN_H */ -- cgit v1.2.3 From 6b998404c71ecff9ebeed343c1ce21f9df3ef131 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 12 Nov 2024 21:36:29 +0100 Subject: net: simplify eeecfg_mac_can_tx_lpi Simplify the function. Signed-off-by: Heiner Kallweit Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/f9a4623b-b94c-466c-8733-62057c6d9a17@gmail.com Signed-off-by: Jakub Kicinski --- include/net/eee.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/eee.h b/include/net/eee.h index 84837aba3cd9..cfab1b8bc46a 100644 --- a/include/net/eee.h +++ b/include/net/eee.h @@ -13,10 +13,7 @@ struct eee_config { static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg) { /* eee_enabled is the master on/off */ - if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled) - return false; - - return true; + return eeecfg->eee_enabled && eeecfg->tx_lpi_enabled; } static inline void eeecfg_to_eee(struct ethtool_keee *eee, -- cgit v1.2.3 From eb94b7bb10109a14a5431a67e5d8e31cfa06b395 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 11 Nov 2024 00:17:34 +0100 Subject: net: Make copy_safe_from_sockptr() match documentation copy_safe_from_sockptr() return copy_from_sockptr() return copy_from_sockptr_offset() return copy_from_user() copy_from_user() does not return an error on fault. Instead, it returns a number of bytes that were not copied. Have it handled. Patch has a side effect: it un-breaks garbage input handling of nfc_llcp_setsockopt() and mISDN's data_sock_setsockopt(). Fixes: 6309863b31dd ("net: add copy_safe_from_sockptr() helper") Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241111-sockptr-copy-ret-fix-v1-1-a520083a93fb@rbox.co Signed-off-by: Jakub Kicinski --- include/linux/sockptr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index fc5a206c4043..195debe2b1db 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -77,7 +77,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize, { if (optlen < ksize) return -EINVAL; - return copy_from_sockptr(dst, optval, ksize); + if (copy_from_sockptr(dst, optval, ksize)) + return -EFAULT; + return 0; } static inline int copy_struct_from_sockptr(void *dst, size_t ksize, -- cgit v1.2.3 From 16220cb315a0a10d2a003d9af534988686e675ea Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Fri, 8 Nov 2024 17:57:05 -0800 Subject: net: dsa: microchip: Add LAN9646 switch support to KSZ DSA driver LAN9646 switch is a 6-port switch with functions like KSZ9897. It has 4 internal PHYs and 1 SGMII port. The chip id read from hardware is same as KSZ9477, so software driver needs to create a new chip id and group allowable functions under its chip data structure to differentiate the product. Signed-off-by: Tristram Ha Link: https://patch.msgid.link/20241109015705.82685-3-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/platform_data/microchip-ksz.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index 2ee1a679e592..0e0e8fe6975f 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -42,6 +42,7 @@ enum ksz_chip_id { LAN9372_CHIP_ID = 0x00937200, LAN9373_CHIP_ID = 0x00937300, LAN9374_CHIP_ID = 0x00937400, + LAN9646_CHIP_ID = 0x00964600, }; struct ksz_platform_data { -- cgit v1.2.3 From e311b04db66aaed1819bdd479d42c4f338f105b9 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 12 Nov 2024 12:56:45 +0000 Subject: soundwire: Update the includes on the sdw.h header There are quite a few things used in the sdw.h header that it relies on the consumer to include. If something is used directly in the header it should be included by the header. Update the includes to cover the missing items, or add forward declarations for things that are only used as pointers. Whilst making the change also alphabetise the list of includes. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20241112125646.590240-1-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 73f655334fe9..1fd4b126287f 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -4,12 +4,19 @@ #ifndef __SOUNDWIRE_H #define __SOUNDWIRE_H +#include #include -#include +#include +#include #include #include +#include #include -#include +#include +#include + +struct dentry; +struct fwnode_handle; struct sdw_bus; struct sdw_slave; -- cgit v1.2.3 From dd690b31de0ed46adc5856698880560b900386ba Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 12 Nov 2024 12:56:46 +0000 Subject: soundwire: Minor formatting fixups in sdw.h header Fixup some minor formatting and whitespace in the sdw.h header file. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20241112125646.590240-2-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 1fd4b126287f..784656f740f6 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -616,7 +616,6 @@ struct sdw_slave_ops { int (*clk_stop)(struct sdw_slave *slave, enum sdw_clk_stop_mode mode, enum sdw_clk_stop_type type); - }; /** @@ -690,8 +689,7 @@ struct sdw_master_device { container_of(d, struct sdw_master_device, dev) struct sdw_driver { - int (*probe)(struct sdw_slave *sdw, - const struct sdw_device_id *id); + int (*probe)(struct sdw_slave *sdw, const struct sdw_device_id *id); int (*remove)(struct sdw_slave *sdw); void (*shutdown)(struct sdw_slave *sdw); @@ -710,7 +708,7 @@ struct sdw_driver { SDW_SLAVE_ENTRY_EXT((_mfg_id), (_part_id), 0, 0, (_drv_data)) int sdw_handle_slave_status(struct sdw_bus *bus, - enum sdw_slave_status status[]); + enum sdw_slave_status status[]); /* * SDW master structures and APIs @@ -792,15 +790,14 @@ struct sdw_enable_ch { */ struct sdw_master_port_ops { int (*dpn_set_port_params)(struct sdw_bus *bus, - struct sdw_port_params *port_params, - unsigned int bank); + struct sdw_port_params *port_params, + unsigned int bank); int (*dpn_set_port_transport_params)(struct sdw_bus *bus, - struct sdw_transport_params *transport_params, - enum sdw_reg_bank bank); - int (*dpn_port_prep)(struct sdw_bus *bus, - struct sdw_prepare_ch *prepare_ch); + struct sdw_transport_params *transport_params, + enum sdw_reg_bank bank); + int (*dpn_port_prep)(struct sdw_bus *bus, struct sdw_prepare_ch *prepare_ch); int (*dpn_port_enable_ch)(struct sdw_bus *bus, - struct sdw_enable_ch *enable_ch, unsigned int bank); + struct sdw_enable_ch *enable_ch, unsigned int bank); }; struct sdw_msg; @@ -835,14 +832,11 @@ struct sdw_defer { */ struct sdw_master_ops { int (*read_prop)(struct sdw_bus *bus); - u64 (*override_adr) - (struct sdw_bus *bus, u64 addr); - enum sdw_command_response (*xfer_msg) - (struct sdw_bus *bus, struct sdw_msg *msg); - enum sdw_command_response (*xfer_msg_defer) - (struct sdw_bus *bus); + u64 (*override_adr)(struct sdw_bus *bus, u64 addr); + enum sdw_command_response (*xfer_msg)(struct sdw_bus *bus, struct sdw_msg *msg); + enum sdw_command_response (*xfer_msg_defer)(struct sdw_bus *bus); int (*set_bus_conf)(struct sdw_bus *bus, - struct sdw_bus_params *params); + struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); int (*post_bank_switch)(struct sdw_bus *bus); u32 (*read_ping_status)(struct sdw_bus *bus); @@ -1019,12 +1013,12 @@ void sdw_release_stream(struct sdw_stream_runtime *stream); int sdw_compute_params(struct sdw_bus *bus); int sdw_stream_add_master(struct sdw_bus *bus, - struct sdw_stream_config *stream_config, - const struct sdw_port_config *port_config, - unsigned int num_ports, - struct sdw_stream_runtime *stream); + struct sdw_stream_config *stream_config, + const struct sdw_port_config *port_config, + unsigned int num_ports, + struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, - struct sdw_stream_runtime *stream); + struct sdw_stream_runtime *stream); int sdw_startup_stream(void *sdw_substream); int sdw_prepare_stream(struct sdw_stream_runtime *stream); int sdw_enable_stream(struct sdw_stream_runtime *stream); -- cgit v1.2.3 From 04782e63917dbcb60932fe93df52c4a4e3859d07 Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Wed, 13 Nov 2024 19:01:52 +0000 Subject: perf/core: Hoist perf_instruction_pointer() and perf_misc_flags() For clarity, rename the arch-specific definitions of these functions to perf_arch_* to denote they are arch-specifc. Define the generic-named functions in one place where they can call the arch-specific ones as needed. Signed-off-by: Colton Lewis Signed-off-by: Ingo Molnar Reviewed-by: Oliver Upton Acked-by: Thomas Richter Acked-by: Mark Rutland Acked-by: Madhavan Srinivasan Acked-by: Kan Liang Link: https://lore.kernel.org/r/20241113190156.2145593-3-coltonlewis@google.com --- include/linux/perf_event.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 91b310052a7c..3b4bf5e329f6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1676,10 +1676,13 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record, struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); -#ifndef perf_misc_flags -# define perf_misc_flags(regs) \ +extern unsigned long perf_misc_flags(struct pt_regs *regs); +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); + +#ifndef perf_arch_misc_flags +# define perf_arch_misc_flags(regs) \ (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) -# define perf_instruction_pointer(regs) instruction_pointer(regs) +# define perf_arch_instruction_pointer(regs) instruction_pointer(regs) #endif #ifndef perf_arch_bpf_user_pt_regs # define perf_arch_bpf_user_pt_regs(regs) regs -- cgit v1.2.3 From 2c47e7a74f445426d156278e339b7abb259e50de Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Wed, 13 Nov 2024 19:01:55 +0000 Subject: perf/core: Correct perf sampling with guest VMs Previously any PMU overflow interrupt that fired while a VCPU was loaded was recorded as a guest event whether it truly was or not. This resulted in nonsense perf recordings that did not honor perf_event_attr.exclude_guest and recorded guest IPs where it should have recorded host IPs. Rework the sampling logic to only record guest samples for events with exclude_guest = 0. This way any host-only events with exclude_guest set will never see unexpected guest samples. The behaviour of events with exclude_guest = 0 is unchanged. Note that events configured to sample both host and guest may still misattribute a PMI that arrived in the host as a guest event depending on KVM arch and vendor behavior. Signed-off-by: Colton Lewis Signed-off-by: Ingo Molnar Reviewed-by: Oliver Upton Acked-by: Mark Rutland Acked-by: Kan Liang Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Link: https://lore.kernel.org/r/20241113190156.2145593-6-coltonlewis@google.com --- include/linux/perf_event.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3b4bf5e329f6..cb99ec8c9e96 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1676,8 +1676,9 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record, struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); -extern unsigned long perf_misc_flags(struct pt_regs *regs); -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs); +extern unsigned long perf_instruction_pointer(struct perf_event *event, + struct pt_regs *regs); #ifndef perf_arch_misc_flags # define perf_arch_misc_flags(regs) \ @@ -1688,6 +1689,22 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); # define perf_arch_bpf_user_pt_regs(regs) regs #endif +#ifndef perf_arch_guest_misc_flags +static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) +{ + unsigned long guest_state = perf_guest_state(); + + if (!(guest_state & PERF_GUEST_ACTIVE)) + return 0; + + if (guest_state & PERF_GUEST_USER) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_GUEST_KERNEL; +} +# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) +#endif + static inline bool has_branch_stack(struct perf_event *event) { return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; -- cgit v1.2.3 From 9fed2c0f2f0771b990d068ef0a2b32e770ae6d48 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 13 Nov 2024 09:17:51 -0500 Subject: fs: reduce pointer chasing in is_mgtime() test The is_mgtime test checks whether the FS_MGTIME flag is set in the fstype. To get there from the inode though, we have to dereference 3 pointers. Add a new IOP_MGTIME flag, and have inode_init_always() set that flag when the fstype flag is set. Then, make is_mgtime test for IOP_MGTIME instead. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241113-mgtime-v1-1-84e256980e11@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b1a3bd07711b..a9c6c8cbda50 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -623,6 +623,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_NOFOLLOW 0x0004 #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 +#define IOP_MGTIME 0x0020 /* * Keep mostly read-only and often accessed (especially for @@ -2581,7 +2582,7 @@ struct file_system_type { */ static inline bool is_mgtime(const struct inode *inode) { - return inode->i_sb->s_type->fs_flags & FS_MGTIME; + return inode->i_opflags & IOP_MGTIME; } extern struct dentry *mount_bdev(struct file_system_type *fs_type, -- cgit v1.2.3 From 8eb36164d1a6769a20ed43033510067ff3dab9ee Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 11 Nov 2024 10:16:49 +0000 Subject: bonding: add ns target multicast address to slave device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4598380f9c54 ("bonding: fix ns validation on backup slaves") tried to resolve the issue where backup slaves couldn't be brought up when receiving IPv6 Neighbor Solicitation (NS) messages. However, this fix only worked for drivers that receive all multicast messages, such as the veth interface. For standard drivers, the NS multicast message is silently dropped because the slave device is not a member of the NS target multicast group. To address this, we need to make the slave device join the NS target multicast group, ensuring it can receive these IPv6 NS messages to validate the slave’s status properly. There are three policies before joining the multicast group: 1. All settings must be under active-backup mode (alb and tlb do not support arp_validate), with backup slaves and slaves supporting multicast. 2. We can add or remove multicast groups when arp_validate changes. 3. Other operations, such as enslaving, releasing, or setting NS targets, need to be guarded by arp_validate. Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets") Signed-off-by: Hangbin Liu Reviewed-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- include/net/bond_options.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 473a0147769e..18687ccf0638 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -161,5 +161,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond); #if IS_ENABLED(CONFIG_IPV6) void bond_option_ns_ip6_targets_clear(struct bonding *bond); #endif +void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave); +void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave); #endif /* _NET_BOND_OPTIONS_H */ -- cgit v1.2.3 From a8ee6b900c147d3bedced6c52ba6cb603226aaa3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 13 Nov 2024 16:35:50 +0100 Subject: netfilter: nf_tables: prepare for multiple elements in nft_trans_elem structure Add helpers to release the individual elements contained in the trans_elem container structure. No functional change intended. Followup patch will add 'nelems' member and will turn 'priv' into a flexible array. These helpers can then loop over all elements. Care needs to be taken to handle a mix of new elements and existing elements that are being updated (e.g. timeout refresh). Before this patch, NEWSETELEM transaction with update is released early so nft_trans_set_elem_destroy() won't get called, so we need to skip elements marked as update. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f24278767bfd..37af0b174c39 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1759,28 +1759,25 @@ enum nft_trans_elem_flags { NFT_TRANS_UPD_EXPIRATION = (1 << 1), }; -struct nft_trans_elem { - struct nft_trans nft_trans; - struct nft_set *set; - struct nft_elem_priv *elem_priv; +struct nft_trans_one_elem { + struct nft_elem_priv *priv; u64 timeout; u64 expiration; u8 update_flags; +}; + +struct nft_trans_elem { + struct nft_trans nft_trans; + struct nft_set *set; bool bound; + unsigned int nelems; + struct nft_trans_one_elem elems[] __counted_by(nelems); }; #define nft_trans_container_elem(t) \ container_of(t, struct nft_trans_elem, nft_trans) #define nft_trans_elem_set(trans) \ nft_trans_container_elem(trans)->set -#define nft_trans_elem_priv(trans) \ - nft_trans_container_elem(trans)->elem_priv -#define nft_trans_elem_update_flags(trans) \ - nft_trans_container_elem(trans)->update_flags -#define nft_trans_elem_timeout(trans) \ - nft_trans_container_elem(trans)->timeout -#define nft_trans_elem_expiration(trans) \ - nft_trans_container_elem(trans)->expiration #define nft_trans_elem_set_bound(trans) \ nft_trans_container_elem(trans)->bound -- cgit v1.2.3 From 508180850b732c7a0e3a728460cf3f95f25e1fbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 13 Nov 2024 16:35:53 +0100 Subject: netfilter: nf_tables: allocate element update information dynamically Move the timeout/expire/flag members from nft_trans_one_elem struct into a dybamically allocated structure, only needed when timeout update was requested. This halves size of nft_trans_one_elem struct and allows to compact up to 124 elements in one transaction container rather than 62. This halves memory requirements for a large flush or insert transaction, where ->update remains NULL. Care has to be taken to release the extra data in all spots, including abort path. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 37af0b174c39..80a537ac26cd 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1759,11 +1759,15 @@ enum nft_trans_elem_flags { NFT_TRANS_UPD_EXPIRATION = (1 << 1), }; -struct nft_trans_one_elem { - struct nft_elem_priv *priv; +struct nft_elem_update { u64 timeout; u64 expiration; - u8 update_flags; + u8 flags; +}; + +struct nft_trans_one_elem { + struct nft_elem_priv *priv; + struct nft_elem_update *update; }; struct nft_trans_elem { -- cgit v1.2.3 From 0c32840763b1579c923b4216c18bb756ca4ba473 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 14 Nov 2024 08:03:57 -0500 Subject: platform/x86/intel/pmt: allow user offset for PMT callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Usage of the telem sysfs file allows for partial reads at an offset. The current callback method returns the buffer starting from offset 0 only. Include the requested offset in the callback and update the necessary address calculations with the offset. Note: offset addition is moved from the caller to the local usage. For non-callback usage this is unchanged behavior. Fixes: e92affc74cd8 ("platform/x86/intel/vsec: Add PMT read callbacks") Reviewed-by: Andy Shevchenko Signed-off-by: Michael J. Ruhl Link: https://lore.kernel.org/r/20241114130358.2467787-2-michael.j.ruhl@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index 11ee185566c3..b94beab64610 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -74,10 +74,11 @@ enum intel_vsec_quirks { * @pdev: PCI device reference for the callback's use * @guid: ID of data to acccss * @data: buffer for the data to be copied + * @off: offset into the requested buffer * @count: size of buffer */ struct pmt_callbacks { - int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, u32 count); + int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, loff_t off, u32 count); }; /** -- cgit v1.2.3 From aefff51e1c2986e16f2780ca8e4c97b784800ab5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 14 Nov 2024 16:31:27 +0100 Subject: statmount: retrieve security mount options Add the ability to retrieve security mount options. Keep them separate from filesystem specific mount options so it's easy to tell them apart. Also allow to retrieve them separate from other mount options as most of the time users won't be interested in security specific mount options. Link: https://lore.kernel.org/r/20241114-radtour-ofenrohr-ff34b567b40a@brauner Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index c0fda4604187..c07008816aca 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -177,7 +177,9 @@ struct statmount { __u32 sb_source; /* [str] Source string of the mount */ __u32 opt_num; /* Number of fs options */ __u32 opt_array; /* [str] Array of nul terminated fs options */ - __u64 __spare2[47]; + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 __spare2[46]; char str[]; /* Variable size part containing strings */ }; @@ -214,6 +216,7 @@ struct mnt_id_req { #define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 0b3144da31f855fce652303f588416a60991bdef Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 07:49:22 +0100 Subject: USB: make single lock for all usb dynamic id lists There are a number of places where we accidentally pass in a constant structure to later cast it off to a dynamic one, and then attempt to grab a lock on it, which is not a good idea. To help resolve this, move the dynamic id lock out of the dynamic id structure for the driver and into one single lock for all USB dynamic ids. As this lock should never have any real contention (it's only every accessed when a device is added or removed, which is always serialized) there should not be any difference except for some memory savings. Note, this just converts the existing use of the dynamic id lock to the new static lock, there is one place that is accessing the dynamic id list without grabbing the lock, that will be fixed up in a follow-on change. Cc: Johan Hovold Cc: Herve Codina Cc: Rob Herring Cc: Alan Stern Cc: Grant Grundler Cc: Oliver Neukum Cc: Yajun Deng Cc: Douglas Anderson Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/2024111322-kindly-finalist-d247@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 672d8fc2abdb..b66b1af3e439 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1129,8 +1129,8 @@ static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size) /* ----------------------------------------------------------------------- */ /* Stuff for dynamic usb ids */ +extern struct mutex usb_dynids_lock; struct usb_dynids { - spinlock_t lock; struct list_head list; }; -- cgit v1.2.3 From 829ed626499c11c9d11c65e93febc1e0da7cd61b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 13 Nov 2024 11:51:36 -0800 Subject: iommufd: Add IOMMU_IOAS_CHANGE_PROCESS Add an ioctl that updates all DMA mappings to reflect the current process, Change the mm and transfer locked memory accounting from old to current mm. This will be used for live update, allowing an old process to hand the iommufd device descriptor to a new process. The new process calls the ioctl. IOMMU_IOAS_CHANGE_PROCESS only supports DMA mappings created with IOMMU_IOAS_MAP_FILE, because the kernel metadata for such mappings does not depend on the userland VA of the pages (which is different in the new process). IOMMU_IOAS_CHANGE_PROCESS fails if other types of mappings are present. This is a revised version of code originally provided by Jason. Link: https://patch.msgid.link/r/1731527497-16091-4-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 747d3d9baa3d..4ae8b1ee0444 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -54,6 +54,7 @@ enum { IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, + IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, }; /** @@ -972,4 +973,26 @@ struct iommu_vdevice_alloc { __aligned_u64 virt_id; }; #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) + +/** + * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS) + * @size: sizeof(struct iommu_ioas_change_process) + * @__reserved: Must be 0 + * + * This transfers pinned memory counts for every memory map in every IOAS + * in the context to the current process. This only supports maps created + * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present. + * If the ioctl returns a failure status, then nothing is changed. + * + * This API is useful for transferring operation of a device from one process + * to another, such as during userland live update. + */ +struct iommu_ioas_change_process { + __u32 size; + __u32 __reserved; +}; + +#define IOMMU_IOAS_CHANGE_PROCESS \ + _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) + #endif -- cgit v1.2.3 From 2f3aab7aecb827ba93c6222646eb0faa8228d590 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:04:40 +0100 Subject: USB: make to_usb_driver() use container_of_const() Turns out that we have some const pointers being passed to to_usb_driver() but were not catching this. Change the macro to properly propagate the const-ness of the pointer so that we will notice when we try to write to memory that we shouldn't be writing to. This requires fixing up the usb_match_dynamic_id() function as well, because it can handle a const * to struct usb_driver. Cc: Johan Hovold Cc: Alan Stern Cc: Grant Grundler Cc: Yajun Deng Cc: Oliver Neukum Cc: Douglas Anderson Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/2024111339-shaky-goldsmith-b233@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index b66b1af3e439..7a9e96f9d886 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1243,7 +1243,7 @@ struct usb_driver { unsigned int disable_hub_initiated_lpm:1; unsigned int soft_unbind:1; }; -#define to_usb_driver(d) container_of(d, struct usb_driver, driver) +#define to_usb_driver(d) container_of_const(d, struct usb_driver, driver) /** * struct usb_device_driver - identifies USB device driver to usbcore -- cgit v1.2.3 From d6fa15bbcf9604e3c14816410550d2cf22b955e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:04:41 +0100 Subject: USB: make to_usb_device_driver() use container_of_const() Turns out that we have some const pointers being passed to to_usb_device_driver() but were not catching this. Change the macro to properly propagate the const-ness of the pointer so that we will notice when we try to write to memory that we shouldn't be writing to. This requires fixing up the usb_driver_applicable() function as well, because it can handle a const * to struct usb_driver. Cc: Johan Hovold Cc: Alan Stern Cc: Grant Grundler Cc: Yajun Deng Cc: Oliver Neukum Cc: Douglas Anderson Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/2024111342-lagoon-reapprove-5e49@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7a9e96f9d886..cfa8005e24f9 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1294,8 +1294,7 @@ struct usb_device_driver { unsigned int supports_autosuspend:1; unsigned int generic_subclass:1; }; -#define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ - driver) +#define to_usb_device_driver(d) container_of_const(d, struct usb_device_driver, driver) /** * struct usb_class_driver - identifies a USB driver that wants to use the USB major number -- cgit v1.2.3 From d96c77bd4eeba469bddbbb14323d2191684da82a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Nov 2024 04:56:31 -0500 Subject: KVM: x86: switch hugepage recovery thread to vhost_task kvm_vm_create_worker_thread() is meant to be used for kthreads that can consume significant amounts of CPU time on behalf of a VM or in response to how the VM behaves (for example how it accesses its memory). Therefore it wants to charge the CPU time consumed by that work to the VM's container. However, because of these threads, cgroups which have kvm instances inside never complete freezing. This can be trivially reproduced: root@test ~# mkdir /sys/fs/cgroup/test root@test ~# echo $$ > /sys/fs/cgroup/test/cgroup.procs root@test ~# qemu-system-x86_64 -nographic -enable-kvm and in another terminal: root@test ~# echo 1 > /sys/fs/cgroup/test/cgroup.freeze root@test ~# cat /sys/fs/cgroup/test/cgroup.events populated 1 frozen 0 The cgroup freezing happens in the signal delivery path but kvm_nx_huge_page_recovery_worker, while joining non-root cgroups, never calls into the signal delivery path and thus never gets frozen. Because the cgroup freezer determines whether a given cgroup is frozen by comparing the number of frozen threads to the total number of threads in the cgroup, the cgroup never becomes frozen and users waiting for the state transition may hang indefinitely. Since the worker kthread is tied to a user process, it's better if it behaves similarly to user tasks as much as possible, including being able to send SIGSTOP and SIGCONT. In fact, vhost_task is all that kvm_vm_create_worker_thread() wanted to be and more: not only it inherits the userspace process's cgroups, it has other niceties like being parented properly in the process tree. Use it instead of the homegrown alternative. Incidentally, the new code is also better behaved when you flip recovery back and forth to disabled and back to enabled. If your recovery period is 1 minute, it will run the next recovery after 1 minute independent of how many times you flipped the parameter. (Commit message based on emails from Tejun). Reported-by: Tejun Heo Reported-by: Luca Boccassi Acked-by: Tejun Heo Tested-by: Luca Boccassi Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 03e4d26e3bcc..401439bb21e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2425,12 +2425,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); - -int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, - uintptr_t data, const char *name, - struct task_struct **thread_ptr); - #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From 94464a7b71634037b13d54021e0dfd0fb0d8c1f0 Mon Sep 17 00:00:00 2001 From: Danil Pylaev Date: Mon, 21 Oct 2024 12:22:44 +0000 Subject: Bluetooth: Add new quirks for ATS2851 This adds quirks for broken extended create connection, and write auth payload timeout. Signed-off-by: Danil Pylaev Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 14 ++++++++++++++ include/net/bluetooth/hci_core.h | 10 ++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index bab1e3d7452a..4f64066915be 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -300,6 +300,20 @@ enum { */ HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, + /* + * When this quirk is set, the HCI_OP_LE_EXT_CREATE_CONN command is + * disabled. This is required for the Actions Semiconductor ATS2851 + * based controllers, which erroneously claims to support it. + */ + HCI_QUIRK_BROKEN_EXT_CREATE_CONN, + + /* + * When this quirk is set, the command WRITE_AUTH_PAYLOAD_TIMEOUT is + * skipped. This is required for the Actions Semiconductor ATS2851 + * based controllers, due to a race condition in pairing process. + */ + HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, + /* When this quirk is set, MSFT extension monitor tracking by * address filter is supported. Since tracking quantity of each * pattern is limited, this feature supports tracking multiple diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 88265d37aa72..94ddc8684973 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1871,8 +1871,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks)) /* Use ext create connection if command is supported */ -#define use_ext_conn(dev) ((dev)->commands[37] & 0x80) - +#define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \ + !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks)) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) @@ -1885,8 +1885,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); * C24: Mandatory if the LE Controller supports Connection State and either * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported */ -#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \ - ext_adv_capable(dev)) +#define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \ + ext_adv_capable(dev)) && \ + !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \ + &(dev)->quirks)) /* Periodic advertising support */ #define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) -- cgit v1.2.3 From 4a5e0ba68676b3a77298cf646cd2b39c94fbd2f5 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 1 Nov 2024 10:23:36 +0200 Subject: Bluetooth: ISO: Do not emit LE PA Create Sync if previous is pending The Bluetooth Core spec does not allow a LE PA Create sync command to be sent to Controller if another one is pending (Vol 4, Part E, page 2493). In order to avoid this issue, the HCI_CONN_CREATE_PA_SYNC was added to mark that the LE PA Create Sync command has been sent for a hcon. Once the PA Sync Established event is received, the hcon flag is erased and the next pending hcon is handled. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 3 ++- include/net/bluetooth/hci_core.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 4f64066915be..4becf201b063 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1,7 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated - Copyright 2023 NXP + Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky @@ -697,6 +697,7 @@ enum { #define HCI_RSSI_INVALID 127 #define HCI_SYNC_HANDLE_INVALID 0xffff +#define HCI_SID_INVALID 0xff #define HCI_ROLE_MASTER 0x00 #define HCI_ROLE_SLAVE 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 94ddc8684973..43474b751a50 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -668,6 +668,7 @@ struct hci_conn { __u8 adv_instance; __u16 handle; __u16 sync_handle; + __u8 sid; __u16 state; __u16 mtu; __u8 mode; @@ -947,6 +948,7 @@ enum { HCI_CONN_CREATE_CIS, HCI_CONN_BIG_SYNC, HCI_CONN_BIG_SYNC_FAILED, + HCI_CONN_CREATE_PA_SYNC, HCI_CONN_PA_SYNC, HCI_CONN_PA_SYNC_FAILED, }; @@ -1099,6 +1101,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, + __u8 sid, + bdaddr_t *dst, + __u8 dst_type) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK || bacmp(&c->dst, dst) || + c->dst_type != dst_type || c->sid != sid) + continue; + + rcu_read_unlock(); + return c; + } + + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn * hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev, bdaddr_t *ba, @@ -1328,6 +1354,13 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) if (c->type != ISO_LINK) continue; + /* Ignore the listen hcon, we are looking + * for the child hcon that was created as + * a result of the PA sync established event. + */ + if (c->state == BT_LISTEN) + continue; + if (c->sync_handle == sync_handle) { rcu_read_unlock(); return c; @@ -1445,6 +1478,7 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); +int hci_pa_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, -- cgit v1.2.3 From 42ecf1947135110ea08abeaca39741636f9a2285 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 1 Nov 2024 10:23:38 +0200 Subject: Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending The Bluetooth Core spec does not allow a LE BIG Create sync command to be sent to Controller if another one is pending (Vol 4, Part E, page 2586). In order to avoid this issue, the HCI_CONN_CREATE_BIG_SYNC was added to mark that the LE BIG Create Sync command has been sent for a hcon. Once the BIG Sync Established event is received, the hcon flag is erased and the next pending hcon is handled. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 4becf201b063..5bb4eaa52e14 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -29,6 +29,7 @@ #define HCI_MAX_ACL_SIZE 1024 #define HCI_MAX_SCO_SIZE 255 #define HCI_MAX_ISO_SIZE 251 +#define HCI_MAX_ISO_BIS 31 #define HCI_MAX_EVENT_SIZE 260 #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 43474b751a50..c95f7e6ba255 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -711,6 +711,9 @@ struct hci_conn { __s8 tx_power; __s8 max_tx_power; struct bt_iso_qos iso_qos; + __u8 num_bis; + __u8 bis[HCI_MAX_ISO_BIS]; + unsigned long flags; enum conn_reasons conn_reason; @@ -946,6 +949,7 @@ enum { HCI_CONN_PER_ADV, HCI_CONN_BIG_CREATED, HCI_CONN_CREATE_CIS, + HCI_CONN_CREATE_BIG_SYNC, HCI_CONN_BIG_SYNC, HCI_CONN_BIG_SYNC_FAILED, HCI_CONN_CREATE_PA_SYNC, @@ -1295,6 +1299,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn * +hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev, + __u8 handle, __u8 num_bis) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK) + continue; + + if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn * hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) { @@ -1479,6 +1507,7 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); int hci_pa_create_sync_pending(struct hci_dev *hdev); +int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, -- cgit v1.2.3 From 83d328a72eff3268ea4c19deb0a6cf4c7da15746 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 1 Nov 2024 10:23:39 +0200 Subject: Bluetooth: ISO: Update hci_conn_hash_lookup_big for Broadcast slave Currently, hci_conn_hash_lookup_big only checks for BIS master connections, by filtering out connections with the destination address set. This commit updates this function to also consider BIS slave connections, since it is also used for a Broadcast Receiver to set an available BIG handle before issuing the LE BIG Create Sync command. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c95f7e6ba255..ea798f07c5a2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1285,7 +1285,17 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK) + if (c->type != ISO_LINK) + continue; + + /* An ISO_LINK hcon with BDADDR_ANY as destination + * address is a Broadcast connection. A Broadcast + * slave connection is associated with a PA train, + * so the sync_handle can be used to differentiate + * from unicast. + */ + if (bacmp(&c->dst, BDADDR_ANY) && + c->sync_handle == HCI_SYNC_HANDLE_INVALID) continue; if (handle == c->iso_qos.bcast.big) { -- cgit v1.2.3 From 96e7c4273560be4744ba8c444bc6969745315251 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 6 Nov 2024 09:53:45 -0500 Subject: Bluetooth: HCI: Add IPC(11) bus type Zephyr(1) has been using the same bus defines as Linux so tools likes of btmon, etc, are able to decode the bus used by the driver to transport HCI packets. Link: https://github.com/zephyrproject-rtos/zephyr/pull/80808 Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5bb4eaa52e14..6203bd8663b7 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -68,6 +68,7 @@ #define HCI_I2C 8 #define HCI_SMD 9 #define HCI_VIRTIO 10 +#define HCI_IPC 11 /* HCI device quirks */ enum { -- cgit v1.2.3 From 827af4787e74e8df9e8e0677a69fbb15e0856d2f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 23 Oct 2024 16:55:57 -0400 Subject: Bluetooth: MGMT: Add initial implementation of MGMT_OP_HCI_CMD_SYNC This adds the initial implementation of MGMT_OP_HCI_CMD_SYNC as documented in mgmt-api (BlueZ tree): Send HCI command and wait for event Command =========================================== Command Code: 0x005B Controller Index: Command Parameters: Opcode (2 Octets) Event (1 Octet) Timeout (1 Octet) Parameter Length (2 Octets) Parameter (variable) Return Parameters: Response (1-variable Octets) This command may be used to send a HCI command and wait for an (optional) event. The HCI command is specified by the Opcode, any arbitrary is supported including vendor commands, but contrary to the like of Raw/User channel it is run as an HCI command send by the kernel since it uses its command synchronization thus it is possible to wait for a specific event as a response. Setting event to 0x00 will cause the command to wait for either HCI Command Status or HCI Command Complete. Timeout is specified in seconds, setting it to 0 will cause the default timeout to be used. Possible errors: Failed Invalid Parameters Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index d382679efd2b..affac861efdc 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -878,6 +878,16 @@ struct mgmt_cp_mesh_send_cancel { } __packed; #define MGMT_MESH_SEND_CANCEL_SIZE 1 +#define MGMT_OP_HCI_CMD_SYNC 0x005B +struct mgmt_cp_hci_cmd_sync { + __le16 opcode; + __u8 event; + __u8 timeout; + __le16 params_len; + __u8 params[]; +} __packed; +#define MGMT_HCI_CMD_SYNC_SIZE 6 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; -- cgit v1.2.3 From a7479860bb4099a8eef64999e843c085838248e4 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Wed, 6 Nov 2024 11:14:30 +0000 Subject: dt-bindings: clock: mediatek: Add bindings for MT6735 syscon clock and reset controllers Add device tree bindings for syscon clock and reset controllers (IMGSYS, MFGCFG, VDECSYS and VENCSYS). Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20241106111402.200940-2-y.oudjana@protonmail.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mediatek,mt6735-imgsys.h | 15 +++++++++++++++ include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h | 8 ++++++++ include/dt-bindings/clock/mediatek,mt6735-vdecsys.h | 9 +++++++++ include/dt-bindings/clock/mediatek,mt6735-vencsys.h | 11 +++++++++++ include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h | 9 +++++++++ include/dt-bindings/reset/mediatek,mt6735-vdecsys.h | 9 +++++++++ 6 files changed, 61 insertions(+) create mode 100644 include/dt-bindings/clock/mediatek,mt6735-imgsys.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-vdecsys.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-vencsys.h create mode 100644 include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h create mode 100644 include/dt-bindings/reset/mediatek,mt6735-vdecsys.h (limited to 'include') diff --git a/include/dt-bindings/clock/mediatek,mt6735-imgsys.h b/include/dt-bindings/clock/mediatek,mt6735-imgsys.h new file mode 100644 index 000000000000..f250c26c5eb4 --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-imgsys.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_IMGSYS_H +#define _DT_BINDINGS_CLK_MT6735_IMGSYS_H + +#define CLK_IMG_SMI_LARB2 0 +#define CLK_IMG_CAM_SMI 1 +#define CLK_IMG_CAM_CAM 2 +#define CLK_IMG_SEN_TG 3 +#define CLK_IMG_SEN_CAM 4 +#define CLK_IMG_CAM_SV 5 +#define CLK_IMG_SUFOD 6 +#define CLK_IMG_FD 7 + +#endif /* _DT_BINDINGS_CLK_MT6735_IMGSYS_H */ diff --git a/include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h b/include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h new file mode 100644 index 000000000000..d2d99a48348a --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_MFGCFG_H +#define _DT_BINDINGS_CLK_MT6735_MFGCFG_H + +#define CLK_MFG_BG3D 0 + +#endif /* _DT_BINDINGS_CLK_MT6735_MFGCFG_H */ diff --git a/include/dt-bindings/clock/mediatek,mt6735-vdecsys.h b/include/dt-bindings/clock/mediatek,mt6735-vdecsys.h new file mode 100644 index 000000000000..f94cec10c89f --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-vdecsys.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_VDECSYS_H +#define _DT_BINDINGS_CLK_MT6735_VDECSYS_H + +#define CLK_VDEC_VDEC 0 +#define CLK_VDEC_SMI_LARB1 1 + +#endif /* _DT_BINDINGS_CLK_MT6735_VDECSYS_H */ diff --git a/include/dt-bindings/clock/mediatek,mt6735-vencsys.h b/include/dt-bindings/clock/mediatek,mt6735-vencsys.h new file mode 100644 index 000000000000..e5a9cb4f269f --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-vencsys.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_VENCSYS_H +#define _DT_BINDINGS_CLK_MT6735_VENCSYS_H + +#define CLK_VENC_SMI_LARB3 0 +#define CLK_VENC_VENC 1 +#define CLK_VENC_JPGENC 2 +#define CLK_VENC_JPGDEC 3 + +#endif /* _DT_BINDINGS_CLK_MT6735_VENCSYS_H */ diff --git a/include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h b/include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h new file mode 100644 index 000000000000..c489242b226e --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_MT6735_MFGCFG_H +#define _DT_BINDINGS_RESET_MT6735_MFGCFG_H + +#define MT6735_MFG_RST0_AXI 0 +#define MT6735_MFG_RST0_G3D 1 + +#endif /* _DT_BINDINGS_RESET_MT6735_MFGCFG_H */ diff --git a/include/dt-bindings/reset/mediatek,mt6735-vdecsys.h b/include/dt-bindings/reset/mediatek,mt6735-vdecsys.h new file mode 100644 index 000000000000..b6ae5d249192 --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt6735-vdecsys.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_MT6735_VDECSYS_H +#define _DT_BINDINGS_RESET_MT6735_VDECSYS_H + +#define MT6735_VDEC_RST0_VDEC 0 +#define MT6735_VDEC_RST1_SMI_LARB1 1 + +#endif /* _DT_BINDINGS_RESET_MT6735_VDECSYS_H */ -- cgit v1.2.3 From f03b086624aa7cfbc6f3bf4ac728639602b4c478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Mon, 4 Nov 2024 17:37:06 +0100 Subject: dt-bindings: clock: Add Marvell PXA1908 clock bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dt bindings and documentation for the Marvell PXA1908 clock controller. Reviewed-by: Conor Dooley Reviewed-by: Stephen Boyd Signed-off-by: Duje Mihanović Link: https://lore.kernel.org/r/20241104-pxa1908-lkml-v13-4-e050609b8d6c@skole.hr Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/marvell,pxa1908.h | 88 +++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 include/dt-bindings/clock/marvell,pxa1908.h (limited to 'include') diff --git a/include/dt-bindings/clock/marvell,pxa1908.h b/include/dt-bindings/clock/marvell,pxa1908.h new file mode 100644 index 000000000000..fb15b0d0cd4c --- /dev/null +++ b/include/dt-bindings/clock/marvell,pxa1908.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +#ifndef __DTS_MARVELL_PXA1908_CLOCK_H +#define __DTS_MARVELL_PXA1908_CLOCK_H + +/* plls */ +#define PXA1908_CLK_CLK32 1 +#define PXA1908_CLK_VCTCXO 2 +#define PXA1908_CLK_PLL1_624 3 +#define PXA1908_CLK_PLL1_416 4 +#define PXA1908_CLK_PLL1_499 5 +#define PXA1908_CLK_PLL1_832 6 +#define PXA1908_CLK_PLL1_1248 7 +#define PXA1908_CLK_PLL1_D2 8 +#define PXA1908_CLK_PLL1_D4 9 +#define PXA1908_CLK_PLL1_D8 10 +#define PXA1908_CLK_PLL1_D16 11 +#define PXA1908_CLK_PLL1_D6 12 +#define PXA1908_CLK_PLL1_D12 13 +#define PXA1908_CLK_PLL1_D24 14 +#define PXA1908_CLK_PLL1_D48 15 +#define PXA1908_CLK_PLL1_D96 16 +#define PXA1908_CLK_PLL1_D13 17 +#define PXA1908_CLK_PLL1_32 18 +#define PXA1908_CLK_PLL1_208 19 +#define PXA1908_CLK_PLL1_117 20 +#define PXA1908_CLK_PLL1_416_GATE 21 +#define PXA1908_CLK_PLL1_624_GATE 22 +#define PXA1908_CLK_PLL1_832_GATE 23 +#define PXA1908_CLK_PLL1_1248_GATE 24 +#define PXA1908_CLK_PLL1_D2_GATE 25 +#define PXA1908_CLK_PLL1_499_EN 26 +#define PXA1908_CLK_PLL2VCO 27 +#define PXA1908_CLK_PLL2 28 +#define PXA1908_CLK_PLL2P 29 +#define PXA1908_CLK_PLL2VCODIV3 30 +#define PXA1908_CLK_PLL3VCO 31 +#define PXA1908_CLK_PLL3 32 +#define PXA1908_CLK_PLL3P 33 +#define PXA1908_CLK_PLL3VCODIV3 34 +#define PXA1908_CLK_PLL4VCO 35 +#define PXA1908_CLK_PLL4 36 +#define PXA1908_CLK_PLL4P 37 +#define PXA1908_CLK_PLL4VCODIV3 38 + +/* apb (apbc) peripherals */ +#define PXA1908_CLK_UART0 1 +#define PXA1908_CLK_UART1 2 +#define PXA1908_CLK_GPIO 3 +#define PXA1908_CLK_PWM0 4 +#define PXA1908_CLK_PWM1 5 +#define PXA1908_CLK_PWM2 6 +#define PXA1908_CLK_PWM3 7 +#define PXA1908_CLK_SSP0 8 +#define PXA1908_CLK_SSP1 9 +#define PXA1908_CLK_IPC_RST 10 +#define PXA1908_CLK_RTC 11 +#define PXA1908_CLK_TWSI0 12 +#define PXA1908_CLK_KPC 13 +#define PXA1908_CLK_SWJTAG 14 +#define PXA1908_CLK_SSP2 15 +#define PXA1908_CLK_TWSI1 16 +#define PXA1908_CLK_THERMAL 17 +#define PXA1908_CLK_TWSI3 18 + +/* apb (apbcp) peripherals */ +#define PXA1908_CLK_UART2 1 +#define PXA1908_CLK_TWSI2 2 +#define PXA1908_CLK_AICER 3 + +/* axi (apmu) peripherals */ +#define PXA1908_CLK_CCIC1 1 +#define PXA1908_CLK_ISP 2 +#define PXA1908_CLK_DSI1 3 +#define PXA1908_CLK_DISP1 4 +#define PXA1908_CLK_CCIC0 5 +#define PXA1908_CLK_SDH0 6 +#define PXA1908_CLK_SDH1 7 +#define PXA1908_CLK_USB 8 +#define PXA1908_CLK_NF 9 +#define PXA1908_CLK_CORE_DEBUG 10 +#define PXA1908_CLK_VPU 11 +#define PXA1908_CLK_GC 12 +#define PXA1908_CLK_SDH2 13 +#define PXA1908_CLK_GC2D 14 +#define PXA1908_CLK_TRACE 15 +#define PXA1908_CLK_DVC_DFC_DEBUG 16 + +#endif -- cgit v1.2.3 From 6a46b75a91a4347e50b25fb4a150a8f34b026034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 6 Nov 2024 17:03:53 +0100 Subject: dt-bindings: clock: eyeq: add more Mobileye EyeQ5/EyeQ6H clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add #defines for Mobileye clock controller: - EyeQ5 core 0 thru 3 clocks. Internally: EQ5C_PLL_CPU: already exposed └── EQ5C_CPU_OCC: unexposed, no reason to do so ├── EQ5C_CPU_CORE0: new! ├── EQ5C_CPU_CORE1: new! ├── EQ5C_CPU_CORE2: new! └── EQ5C_CPU_CORE3: new! - EyeQ5 peripheral clocks. Internally: EQ5C_PLL_PER: already exposed ├── EQ5C_PER_OCC: new! │ ├── EQ5C_PER_SPI: new! │ ├── EQ5C_PER_I2C: new! │ ├── EQ5C_PER_GPIO: new! │ └── EQ5C_PER_UART: new! ├── EQ5C_PER_EMMC: new! └── EQ5C_PER_OCC_PCI: new! - EyeQ6H central OLB. Internally: EQ6HC_CENTRAL_PLL_CPU: new! └── EQ6HC_CENTRAL_CPU_OCC: new! - EyeQ6H west OLB. Internally: EQ6HC_WEST_PLL_PER: new! └── EQ6HC_WEST_PER_OCC: new! └── EQ6HC_WEST_PER_UART: new! Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20241106-mbly-clk-v2-2-84cfefb3f485@bootlin.com Acked-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mobileye,eyeq5-clk.h | 46 +++++++++++++++++++------- 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/mobileye,eyeq5-clk.h b/include/dt-bindings/clock/mobileye,eyeq5-clk.h index b433c1772c28..f353c2988035 100644 --- a/include/dt-bindings/clock/mobileye,eyeq5-clk.h +++ b/include/dt-bindings/clock/mobileye,eyeq5-clk.h @@ -6,24 +6,46 @@ #ifndef _DT_BINDINGS_CLOCK_MOBILEYE_EYEQ5_CLK_H #define _DT_BINDINGS_CLOCK_MOBILEYE_EYEQ5_CLK_H -#define EQ5C_PLL_CPU 0 -#define EQ5C_PLL_VMP 1 -#define EQ5C_PLL_PMA 2 -#define EQ5C_PLL_VDI 3 -#define EQ5C_PLL_DDR0 4 -#define EQ5C_PLL_PCI 5 -#define EQ5C_PLL_PER 6 -#define EQ5C_PLL_PMAC 7 -#define EQ5C_PLL_MPC 8 -#define EQ5C_PLL_DDR1 9 - -#define EQ5C_DIV_OSPI 10 +#define EQ5C_PLL_CPU 0 +#define EQ5C_PLL_VMP 1 +#define EQ5C_PLL_PMA 2 +#define EQ5C_PLL_VDI 3 +#define EQ5C_PLL_DDR0 4 +#define EQ5C_PLL_PCI 5 +#define EQ5C_PLL_PER 6 +#define EQ5C_PLL_PMAC 7 +#define EQ5C_PLL_MPC 8 +#define EQ5C_PLL_DDR1 9 + +#define EQ5C_DIV_OSPI 10 + +/* EQ5C_PLL_CPU children */ +#define EQ5C_CPU_CORE0 11 +#define EQ5C_CPU_CORE1 12 +#define EQ5C_CPU_CORE2 13 +#define EQ5C_CPU_CORE3 14 + +/* EQ5C_PLL_PER children */ +#define EQ5C_PER_OCC 15 +#define EQ5C_PER_UART 16 +#define EQ5C_PER_SPI 17 +#define EQ5C_PER_I2C 18 +#define EQ5C_PER_GPIO 19 +#define EQ5C_PER_EMMC 20 +#define EQ5C_PER_OCC_PCI 21 #define EQ6LC_PLL_DDR 0 #define EQ6LC_PLL_CPU 1 #define EQ6LC_PLL_PER 2 #define EQ6LC_PLL_VDI 3 +#define EQ6HC_CENTRAL_PLL_CPU 0 +#define EQ6HC_CENTRAL_CPU_OCC 1 + +#define EQ6HC_WEST_PLL_PER 0 +#define EQ6HC_WEST_PER_OCC 1 +#define EQ6HC_WEST_PER_UART 2 + #define EQ6HC_SOUTH_PLL_VDI 0 #define EQ6HC_SOUTH_PLL_PCIE 1 #define EQ6HC_SOUTH_PLL_PER 2 -- cgit v1.2.3 From 4eb5e9c6c4cdfca9fb0577f62580db46f5acd1a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 6 Nov 2024 17:03:54 +0100 Subject: clk: fixed-factor: add clk_hw_register_fixed_factor_index() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add non-devres version of clk_hw_register_fixed_factor(), with parent targeted using its index. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20241106-mbly-clk-v2-3-84cfefb3f485@bootlin.com Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index dbe793964c24..52b46c63a2bd 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1142,6 +1142,9 @@ struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device * struct device_node *np, const char *name, const char *fw_name, unsigned long flags, unsigned int mult, unsigned int div, unsigned long acc); +struct clk_hw *clk_hw_register_fixed_factor_index(struct device *dev, + const char *name, unsigned int index, unsigned long flags, + unsigned int mult, unsigned int div); void clk_hw_unregister_fixed_factor(struct clk_hw *hw); struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, -- cgit v1.2.3 From 721aa69e708b7432af83c4bb00a30e2b7c27da28 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 8 Nov 2024 07:54:47 +0100 Subject: net: phy: convert eee_broken_modes to a linkmode bitmap eee_broken_modes has a eee_cap1 register layout currently. This doen't allow to flag e.g. 2.5Gbps or 5Gbps BaseT EEE as broken. To overcome this limitation switch eee_broken_modes to a linkmode bitmap. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/dfe0c9ff-84b0-4328-86d7-e917ebc084a1@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1e4127c495c0..a6622f873d03 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -721,16 +721,15 @@ struct phy_device { /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); + /* Energy efficient ethernet modes which should be prohibited */ + __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); bool eee_enabled; + bool enable_tx_lpi; + struct eee_config eee_cfg; /* Host supported PHY interface types. Should be ignored if empty. */ DECLARE_PHY_INTERFACE_MASK(host_interfaces); - /* Energy efficient ethernet modes which should be prohibited */ - u32 eee_broken_modes; - bool enable_tx_lpi; - struct eee_config eee_cfg; - #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; -- cgit v1.2.3 From ed623fb8e38e2a241da12864778ec9c9cf930c65 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 8 Nov 2024 08:07:10 +0100 Subject: net: phy: add phy_set_eee_broken Add an accessor for eee_broken_modes, so that drivers don't have to deal with phylib internals. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/0f8ee279-d40d-4489-a3b0-d993472d744a@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index a6622f873d03..b8346db42727 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1263,6 +1263,16 @@ void of_set_phy_eee_broken(struct phy_device *phydev); void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); +/** + * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised. + * @phydev: The phy_device struct + * @link_mode: The broken EEE mode + */ +static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode) +{ + linkmode_set_bit(link_mode, phydev->eee_broken_modes); +} + /** * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct -- cgit v1.2.3 From e7cb7cf43afb96a4fd8f68c2dfb9b2fbdd444654 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Tue, 12 Nov 2024 20:54:29 +1000 Subject: include: mdio: Remove mdio45_ethtool_gset() mdio45_ethtool_gset() is never called, so let's remove it. Signed-off-by: Alistair Francis Link: https://patch.msgid.link/20241112105430.438491-1-alistair@alistair23.me Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index efeca5bd7600..c63f43645d50 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -173,22 +173,6 @@ mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio, struct ethtool_link_ksettings *cmd, u32 npage_adv, u32 npage_lpa); -/** - * mdio45_ethtool_gset - get settings for ETHTOOL_GSET - * @mdio: MDIO interface - * @ecmd: Ethtool request structure - * - * Since the CSRs for auto-negotiation using next pages are not fully - * standardised, this function does not attempt to decode them. Use - * mdio45_ethtool_gset_npage() to specify advertisement bits from next - * pages. - */ -static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio, - struct ethtool_cmd *ecmd) -{ - mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0); -} - /** * mdio45_ethtool_ksettings_get - get settings for ETHTOOL_GLINKSETTINGS * @mdio: MDIO interface -- cgit v1.2.3 From 575092a7f0ce4c6d5907cd908d28c0f69690a136 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Tue, 12 Nov 2024 20:54:30 +1000 Subject: mdio: Remove mdio45_ethtool_gset_npage() The mdio45_ethtool_gset_npage() function isn't called, so let's remove it. Signed-off-by: Alistair Francis Link: https://patch.msgid.link/20241112105430.438491-2-alistair@alistair23.me Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index c63f43645d50..3c3deac57894 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -165,9 +165,6 @@ extern int mdio_set_flag(const struct mdio_if_info *mdio, bool sense); extern int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmds); extern int mdio45_nway_restart(const struct mdio_if_info *mdio); -extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, - struct ethtool_cmd *ecmd, - u32 npage_adv, u32 npage_lpa); extern void mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio, struct ethtool_link_ksettings *cmd, -- cgit v1.2.3 From 9e43ad7a1edef268acac603e1975c8f50a20d02f Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 13 Nov 2024 12:13:09 +0000 Subject: net: ethtool: only allow set_rxnfc with rss + ring_cookie if driver opts in Ethtool ntuple filters with FLOW_RSS were originally defined as adding the base queue ID (ring_cookie) to the value from the indirection table, so that the same table could distribute over more than one set of queues when used by different filters. However, some drivers / hardware ignore the ring_cookie, and simply use the indirection table entries as queue IDs directly. Thus, for drivers which have not opted in by setting ethtool_ops.cap_rss_rxnfc_adds to declare that they support the original (addition) semantics, reject in ethtool_set_rxnfc any filter which combines FLOW_RSS and a nonzero ring. (For a ring_cookie of zero, both behaviours are equivalent.) Set the cap bit in sfc, as it is known to support this feature. Signed-off-by: Edward Cree Reviewed-by: Martin Habets Link: https://patch.msgid.link/cc3da0844083b0e301a33092a6299e4042b65221.1731499022.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1199e308c8dd..299280c94d07 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -734,6 +734,9 @@ struct kernel_ethtool_ts_info { * @rxfh_per_ctx_key: device supports setting different RSS key for each * additional context. Netlink API should report hfunc, key, and input_xfrm * for every context, not just context 0. + * @cap_rss_rxnfc_adds: device supports nonzero ring_cookie in filters with + * %FLOW_RSS flag; the queue ID from the filter is added to the value from + * the indirection table to determine the delivery queue. * @rxfh_indir_space: max size of RSS indirection tables, if indirection table * size as returned by @get_rxfh_indir_size may change during lifetime * of the device. Leave as 0 if the table size is constant. @@ -956,6 +959,7 @@ struct ethtool_ops { u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; u32 rxfh_per_ctx_key:1; + u32 cap_rss_rxnfc_adds:1; u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; -- cgit v1.2.3 From fd7b4f9f46d46acbc7af3a439bb0d869efdc5c58 Mon Sep 17 00:00:00 2001 From: Qun-Wei Lin Date: Wed, 13 Nov 2024 12:25:43 +0800 Subject: sched/task_stack: fix object_is_on_stack() for KASAN tagged pointers When CONFIG_KASAN_SW_TAGS and CONFIG_KASAN_STACK are enabled, the object_is_on_stack() function may produce incorrect results due to the presence of tags in the obj pointer, while the stack pointer does not have tags. This discrepancy can lead to incorrect stack object detection and subsequently trigger warnings if CONFIG_DEBUG_OBJECTS is also enabled. Example of the warning: ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at lib/debugobjects.c:557 __debug_object_init+0x330/0x364 Modules linked in: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5 #4 Hardware name: linux,dummy-virt (DT) pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __debug_object_init+0x330/0x364 lr : __debug_object_init+0x330/0x364 sp : ffff800082ea7b40 x29: ffff800082ea7b40 x28: 98ff0000c0164518 x27: 98ff0000c0164534 x26: ffff800082d93ec8 x25: 0000000000000001 x24: 1cff0000c00172a0 x23: 0000000000000000 x22: ffff800082d93ed0 x21: ffff800081a24418 x20: 3eff800082ea7bb0 x19: efff800000000000 x18: 0000000000000000 x17: 00000000000000ff x16: 0000000000000047 x15: 206b63617473206e x14: 0000000000000018 x13: ffff800082ea7780 x12: 0ffff800082ea78e x11: 0ffff800082ea790 x10: 0ffff800082ea79d x9 : 34d77febe173e800 x8 : 34d77febe173e800 x7 : 0000000000000001 x6 : 0000000000000001 x5 : feff800082ea74b8 x4 : ffff800082870a90 x3 : ffff80008018d3c4 x2 : 0000000000000001 x1 : ffff800082858810 x0 : 0000000000000050 Call trace: __debug_object_init+0x330/0x364 debug_object_init_on_stack+0x30/0x3c schedule_hrtimeout_range_clock+0xac/0x26c schedule_hrtimeout+0x1c/0x30 wait_task_inactive+0x1d4/0x25c kthread_bind_mask+0x28/0x98 init_rescuer+0x1e8/0x280 workqueue_init+0x1a0/0x3cc kernel_init_freeable+0x118/0x200 kernel_init+0x28/0x1f0 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated. ------------[ cut here ]------------ Link: https://lkml.kernel.org/r/20241113042544.19095-1-qun-wei.lin@mediatek.com Signed-off-by: Qun-Wei Lin Cc: Andrew Yang Cc: AngeloGioacchino Del Regno Cc: Casper Li Cc: Catalin Marinas Cc: Chinwen Chang Cc: Kent Overstreet Cc: Matthias Brugger Cc: Pasha Tatashin Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton --- include/linux/sched/task_stack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index bf10bdb487dd..6c2fef89a4fd 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef CONFIG_THREAD_INFO_IN_TASK @@ -89,6 +90,7 @@ static inline int object_is_on_stack(const void *obj) { void *stack = task_stack_page(current); + obj = kasan_reset_tag(obj); return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -- cgit v1.2.3 From 05d4532b60e3e6e2a094ec56a88d1def50bd2430 Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Fri, 1 Nov 2024 13:44:02 -0700 Subject: memcg/hugetlb: add hugeTLB counters to memcg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a new counter to memory.stat that tracks hugeTLB usage, only if hugeTLB accounting is done to memory.current. This feature is enabled the same way hugeTLB accounting is enabled, via the memory_hugetlb_accounting mount flag for cgroupsv2. 1. Why is this patch necessary? Currently, memcg hugeTLB accounting is an opt-in feature [1] that adds hugeTLB usage to memory.current. However, the metric is not reported in memory.stat. Given that users often interpret memory.stat as a breakdown of the value reported in memory.current, the disparity between the two reports can be confusing. This patch solves this problem by including the metric in memory.stat as well, but only if it is also reported in memory.current (it would also be confusing if the value was reported in memory.stat, but not in memory.current) Aside from the consistency between the two files, we also see benefits in observability. Userspace might be interested in the hugeTLB footprint of cgroups for many reasons. For instance, system admins might want to verify that hugeTLB usage is distributed as expected across tasks: i.e. memory-intensive tasks are using more hugeTLB pages than tasks that don't consume a lot of memory, or are seen to fault frequently. Note that this is separate from wanting to inspect the distribution for limiting purposes (in which case, hugeTLB controller makes more sense). 2. We already have a hugeTLB controller. Why not use that? It is true that hugeTLB tracks the exact value that we want. In fact, by enabling the hugeTLB controller, we get all of the observability benefits that I mentioned above, and users can check the total hugeTLB usage, verify if it is distributed as expected, etc. With this said, there are 2 problems: (a) They are still not reported in memory.stat, which means the disparity between the memcg reports are still there. (b) We cannot reasonably expect users to enable the hugeTLB controller just for the sake of hugeTLB usage reporting, especially since they don't have any use for hugeTLB usage enforcing [2]. 3. Implementation Details: In the alloc / free hugetlb functions, we call lruvec_stat_mod_folio regardless of whether memcg accounts hugetlb. mem_cgroup_commit_charge which is called from alloc_hugetlb_folio will set memcg for the folio only if the CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING cgroup mount option is used, so lruvec_stat_mod_folio accounts per-memcg hugetlb counters only if the feature is enabled. Regardless of whether memcg accounts for hugetlb, the newly added global counter is updated and shown in /proc/vmstat. The global counter is added because vmstats is the preferred framework for cgroup stats. It makes stat items consistent between global and cgroups. It also provides a per-node breakdown, which is useful. Because it does not use cgroup-specific hooks, we also keep generic MM code separate from memcg code. [1] https://lore.kernel.org/all/20231006184629.155543-1-nphamcs@gmail.com/ [2] Of course, we can't make a new patch for every feature that can be duplicated. However, since the existing solution of enabling the hugeTLB controller is an imperfect solution that still leaves a discrepancy between memory.stat and memory.curent, I think that it is reasonable to isolate the feature in this case. Link: https://lkml.kernel.org/r/20241101204402.1885383-1-joshua.hahnjy@gmail.com Signed-off-by: Joshua Hahn Suggested-by: Nhat Pham Suggested-by: Shakeel Butt Suggested-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Chris Down Acked-by: Michal Hocko Reviewed-by: Roman Gushchin Reviewed-by: Nhat Pham Cc: Jonathan Corbet Cc: Michal Koutný Cc: Muchun Song Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5e8f567753bd..b36124145a16 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,6 +220,9 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, +#ifdef CONFIG_HUGETLB_PAGE + NR_HUGETLB, +#endif NR_VM_NODE_STAT_ITEMS }; -- cgit v1.2.3 From a12143e6084c502fc3cfaa8b717bffc8c14cf806 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 14 Nov 2024 22:07:51 +0100 Subject: netfilter: bitwise: rename some boolean operation functions In the next patch we add support for doing AND, OR and XOR operations directly in the kernel, so rename some functions and an enum constant related to mask-and-xor boolean operations. Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 9e9079321380..487542234ccd 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -564,16 +564,20 @@ enum nft_immediate_attributes { /** * enum nft_bitwise_ops - nf_tables bitwise operations * - * @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and - * XOR boolean operations + * @NFT_BITWISE_MASK_XOR: mask-and-xor operation used to implement NOT, AND, OR + * and XOR boolean operations * @NFT_BITWISE_LSHIFT: left-shift operation * @NFT_BITWISE_RSHIFT: right-shift operation */ enum nft_bitwise_ops { - NFT_BITWISE_BOOL, + NFT_BITWISE_MASK_XOR, NFT_BITWISE_LSHIFT, NFT_BITWISE_RSHIFT, }; +/* + * Old name for NFT_BITWISE_MASK_XOR. Retained for backwards-compatibility. + */ +#define NFT_BITWISE_BOOL NFT_BITWISE_MASK_XOR /** * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes -- cgit v1.2.3 From 4a530a7c751d27f9dbd70b7fc45670cd11713b13 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 11 Oct 2024 11:00:21 +0200 Subject: fs: prepare for "explicit connectable" file handles We would like to use the high 16bit of the handle_type field to encode file handle traits, such as "connectable". In preparation for this change, make sure that filesystems do not return a handle_type value with upper bits set and that the open_by_handle_at(2) syscall rejects these handle types. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20241011090023.655623-2-amir73il@gmail.com Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export") Acked-by: Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 893a1d21dc1c..5e14d4500a75 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -160,6 +160,17 @@ struct fid { #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ +/* + * Filesystems use only lower 8 bits of file_handle type for fid_type. + * name_to_handle_at() uses upper 16 bits of type as user flags to be + * interpreted by open_by_handle_at(). + */ +#define FILEID_USER_FLAGS_MASK 0xffff0000 +#define FILEID_USER_FLAGS(type) ((type) & FILEID_USER_FLAGS_MASK) + +/* Flags supported in encoded handle_type that is exported to user */ +#define FILEID_VALID_USER_FLAGS (0) + /** * struct export_operations - for nfsd to communicate with file systems * @encode_fh: encode a file handle fragment from a dentry -- cgit v1.2.3 From c374196b2b9f4b803fccd59ed82f0712041e21e1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 11 Oct 2024 11:00:22 +0200 Subject: fs: name_to_handle_at() support for "explicit connectable" file handles nfsd encodes "connectable" file handles for the subtree_check feature, which can be resolved to an open file with a connected path. So far, userspace nfs server could not make use of this functionality. Introduce a new flag AT_HANDLE_CONNECTABLE to name_to_handle_at(2). When used, the encoded file handle is "explicitly connectable". The "explicitly connectable" file handle sets bits in the high 16bit of the handle_type field, so open_by_handle_at(2) will know that it needs to open a file with a connected path. old kernels will now recognize the handle_type with high bits set, so "explicitly connectable" file handles cannot be decoded by open_by_handle_at(2) on old kernels. The flag AT_HANDLE_CONNECTABLE is not allowed together with either AT_HANDLE_FID or AT_EMPTY_PATH. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20241011090023.655623-3-amir73il@gmail.com Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export") Acked-by: Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 2 ++ include/uapi/linux/fcntl.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 5e14d4500a75..4ee42b2cf4ab 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -169,6 +169,8 @@ struct fid { #define FILEID_USER_FLAGS(type) ((type) & FILEID_USER_FLAGS_MASK) /* Flags supported in encoded handle_type that is exported to user */ +#define FILEID_IS_CONNECTABLE 0x10000 +#define FILEID_IS_DIR 0x20000 #define FILEID_VALID_USER_FLAGS (0) /** diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 87e2dec79fea..56ff2100e021 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -153,6 +153,7 @@ object identity and may not be usable with open_by_handle_at(2). */ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ +#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 -- cgit v1.2.3 From a20853ab8296d4a8754482cb5e9adde8ab426a25 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 11 Oct 2024 11:00:23 +0200 Subject: fs: open_by_handle_at() support for decoding "explicit connectable" file handles Teach open_by_handle_at(2) about the type format of "explicit connectable" file handles that were created using the AT_HANDLE_CONNECTABLE flag to name_to_handle_at(2). When decoding an "explicit connectable" file handles, name_to_handle_at(2) should fail if it cannot open a "connected" fd with known path, which is accessible (to capable user) from mount fd path. Note that this does not check if the path is accessible to the calling user, just that it is accessible wrt the mount namesapce, so if there is no "connected" alias, or if parts of the path are hidden in the mount namespace, open_by_handle_at(2) will return -ESTALE. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20241011090023.655623-4-amir73il@gmail.com Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export") Acked-by: Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 4ee42b2cf4ab..fcab6ab1d38a 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -171,7 +171,7 @@ struct fid { /* Flags supported in encoded handle_type that is exported to user */ #define FILEID_IS_CONNECTABLE 0x10000 #define FILEID_IS_DIR 0x20000 -#define FILEID_VALID_USER_FLAGS (0) +#define FILEID_VALID_USER_FLAGS (FILEID_IS_CONNECTABLE | FILEID_IS_DIR) /** * struct export_operations - for nfsd to communicate with file systems -- cgit v1.2.3 From b0ccf4f53d968e794a4ea579d5135cc1aaf1a53f Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 14 Nov 2024 22:08:13 +0100 Subject: netfilter: bitwise: add support for doing AND, OR and XOR directly Hitherto, these operations have been converted in user space to mask-and-xor operations on one register and two immediate values, and it is the latter which have been evaluated by the kernel. We add support for evaluating these operations directly in kernel space on one register and either an immediate value or a second register. Pablo made a few changes to the original patch: - EINVAL if NFTA_BITWISE_SREG2 is used with fast version. - Allow _AND,_OR,_XOR with _DATA != sizeof(u32) - Dump _SREG2 or _DATA with _AND,_OR,_XOR Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 487542234ccd..49c944e78463 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -568,11 +568,17 @@ enum nft_immediate_attributes { * and XOR boolean operations * @NFT_BITWISE_LSHIFT: left-shift operation * @NFT_BITWISE_RSHIFT: right-shift operation + * @NFT_BITWISE_AND: and operation + * @NFT_BITWISE_OR: or operation + * @NFT_BITWISE_XOR: xor operation */ enum nft_bitwise_ops { NFT_BITWISE_MASK_XOR, NFT_BITWISE_LSHIFT, NFT_BITWISE_RSHIFT, + NFT_BITWISE_AND, + NFT_BITWISE_OR, + NFT_BITWISE_XOR, }; /* * Old name for NFT_BITWISE_MASK_XOR. Retained for backwards-compatibility. @@ -590,6 +596,7 @@ enum nft_bitwise_ops { * @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops) * @NFTA_BITWISE_DATA: argument for non-boolean operations * (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_SREG2: second source register (NLA_U32: nft_registers) * * The bitwise expression supports boolean and shift operations. It implements * the boolean operations by performing the following operation: @@ -613,6 +620,7 @@ enum nft_bitwise_attributes { NFTA_BITWISE_XOR, NFTA_BITWISE_OP, NFTA_BITWISE_DATA, + NFTA_BITWISE_SREG2, __NFTA_BITWISE_MAX }; #define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) -- cgit v1.2.3 From 957860cbc1dc89f79f2acc193470224e350dfd03 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Nov 2024 07:14:03 -0700 Subject: block: make struct rq_list available for !CONFIG_BLOCK A previous commit changed how requests are linked in the plug structure, but unlike the previous method, it uses a new type for it rather than struct request. The latter is available even for !CONFIG_BLOCK, while struct rq_list is now. Move it outside CONFIG_BLOCK. Reported-by: Nathan Chancellor Fixes: a3396b99990d ("block: add a rq_list type") Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00212e96261a..a1fd0ddce5cf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1006,12 +1006,12 @@ extern void blk_put_queue(struct request_queue *); void blk_mark_disk_dead(struct gendisk *disk); -#ifdef CONFIG_BLOCK struct rq_list { struct request *head; struct request *tail; }; +#ifdef CONFIG_BLOCK /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests -- cgit v1.2.3 From 7eb4e1dd71009472215bc1f8226ee9a041da8d37 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 12 Nov 2024 18:52:16 +0000 Subject: x86/efi: Drop support for the EFI_PROPERTIES_TABLE Drop support for the EFI_PROPERTIES_TABLE. It was a failed, short-lived experiment that broke the boot both on Linux and Windows, and was replaced by the EFI_MEMORY_ATTRIBUTES_TABLE shortly after. Suggested-by: Ard Biesheuvel Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index e28d88066033..e5815867aba9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -379,7 +379,6 @@ void efi_native_runtime_setup(void); #define EFI_SYSTEM_RESOURCE_TABLE_GUID EFI_GUID(0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) #define EFI_FILE_SYSTEM_GUID EFI_GUID(0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define DEVICE_TREE_GUID EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0) -#define EFI_PROPERTIES_TABLE_GUID EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5) #define EFI_RNG_PROTOCOL_GUID EFI_GUID(0x3152bca5, 0xeade, 0x433d, 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) #define EFI_RNG_ALGORITHM_RAW EFI_GUID(0xe43176d7, 0xb6e8, 0x4827, 0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61) #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) @@ -580,15 +579,6 @@ struct efi_mem_range { u64 attribute; }; -typedef struct { - u32 version; - u32 length; - u64 memory_protection_attribute; -} efi_properties_table_t; - -#define EFI_PROPERTIES_TABLE_VERSION 0x00010000 -#define EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA 0x1 - typedef struct { u16 version; u16 length; @@ -871,10 +861,9 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) #define EFI_PARAVIRT 6 /* Access is via a paravirt interface */ #define EFI_ARCH_1 7 /* First arch-specific bit */ #define EFI_DBG 8 /* Print additional debug info at runtime */ -#define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ -#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ -#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ -#define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ +#define EFI_MEM_ATTR 9 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ +#define EFI_MEM_NO_SOFT_RESERVE 10 /* Is the kernel configured to ignore soft reservations? */ +#define EFI_PRESERVE_BS_REGIONS 11 /* Are EFI boot-services memory segments available? */ #ifdef CONFIG_EFI /* -- cgit v1.2.3 From 83e041522eb9c45479f4490b212687cf1e7e9999 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:40 +0000 Subject: io_uring: temporarily disable registered waits Disable wait argument registration as it'll be replaced with a more generic feature. We'll still need IORING_ENTER_EXT_ARG_REG parsing in a few commits so leave it be. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/70b1d1d218c41ba77a76d1789c8641dab0b0563e.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 10 ---------- include/uapi/linux/io_uring.h | 3 --- 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 072e65e93105..52a5da99a205 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -330,14 +330,6 @@ struct io_ring_ctx { atomic_t cq_wait_nr; atomic_t cq_timeouts; struct wait_queue_head cq_wait; - - /* - * If registered with IORING_REGISTER_CQWAIT_REG, a single - * page holds N entries, mapped in cq_wait_arg. cq_wait_index - * is the maximum allowable index. - */ - struct io_uring_reg_wait *cq_wait_arg; - unsigned char cq_wait_index; } ____cacheline_aligned_in_smp; /* timeouts */ @@ -431,8 +423,6 @@ struct io_ring_ctx { unsigned short n_sqe_pages; struct page **ring_pages; struct page **sqe_pages; - - struct page **cq_wait_page; }; struct io_tw_state { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5d08435b95a8..132f5db3d4e8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -627,9 +627,6 @@ enum io_uring_register_op { /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, - /* register fixed io_uring_reg_wait arguments */ - IORING_REGISTER_CQWAIT_REG = 34, - /* this goes last */ IORING_REGISTER_LAST, -- cgit v1.2.3 From dfbbfbf191878e8dd422768ce009858d8b5b761e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:41 +0000 Subject: io_uring: introduce concept of memory regions We've got a good number of mappings we share with the userspace, that includes the main rings, provided buffer rings, upcoming rings for zerocopy rx and more. All of them duplicate user argument parsing and some internal details as well (page pinnning, huge page optimisations, mmap'ing, etc.) Introduce a notion of regions. For userspace for now it's just a new structure called struct io_uring_region_desc which is supposed to parameterise all such mapping / queue creations. A region either represents a user provided chunk of memory, in which case the user_addr field should point to it, or a request for the kernel to allocate the memory, in which case the user would need to mmap it after using the offset returned in the mmap_offset field. With a uniform userspace API we can avoid additional boiler plate code and apply future optimisation to all of them at once. Internally, there is a new structure struct io_mapped_region holding all relevant runtime information and some helpers to work with it. This patch limits it to user provided regions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0e6fe25818dfbaebd1bd90b870a6cac503fe1a24.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 ++++++ include/uapi/linux/io_uring.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 52a5da99a205..1d3a37234ace 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -75,6 +75,12 @@ struct io_hash_table { unsigned hash_bits; }; +struct io_mapped_region { + struct page **pages; + void *vmap_ptr; + size_t nr_pages; +}; + /* * Arbitrary limit, can be raised if need be */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 132f5db3d4e8..5cbfd330c688 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -647,6 +647,20 @@ struct io_uring_files_update { __aligned_u64 /* __s32 * */ fds; }; +enum { + /* initialise with user provided memory pointed by user_addr */ + IORING_MEM_REGION_TYPE_USER = 1, +}; + +struct io_uring_region_desc { + __u64 user_addr; + __u64 size; + __u32 flags; + __u32 id; + __u64 mmap_offset; + __u64 __resv[4]; +}; + /* * Register a fully sparse file space, rather than pass in an array of all * -1 file descriptors. -- cgit v1.2.3 From 93238e66185524aad925acefb2312203b9e26d63 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:42 +0000 Subject: io_uring: add memory region registration Regions will serve multiple purposes. First, with it we can decouple ring/etc. object creation from registration / mapping of the memory they will be placed in. We already have hacks that allow to put both SQ and CQ into the same huge page, in the future we should be able to: region = create_region(io_ring); create_pbuf_ring(io_uring, region, offset=0); create_pbuf_ring(io_uring, region, offset=N); The second use case is efficiently passing parameters. The following patch enables back on top of regions IORING_ENTER_EXT_ARG_REG, which optimises wait arguments. It'll also be useful for request arguments replacing iovecs, msghdr, etc. pointers. Eventually it would also be handy for BPF as well if it comes to fruition. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0798cf3a14fad19cfc96fc9feca5f3e11481691d.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ include/uapi/linux/io_uring.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 1d3a37234ace..e1d69123e164 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -429,6 +429,9 @@ struct io_ring_ctx { unsigned short n_sqe_pages; struct page **ring_pages; struct page **sqe_pages; + + /* used for optimised request parameter and wait argument passing */ + struct io_mapped_region param_region; }; struct io_tw_state { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5cbfd330c688..1ee35890125b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -627,6 +627,8 @@ enum io_uring_register_op { /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, + IORING_REGISTER_MEM_REGION = 34, + /* this goes last */ IORING_REGISTER_LAST, @@ -661,6 +663,12 @@ struct io_uring_region_desc { __u64 __resv[4]; }; +struct io_uring_mem_region_reg { + __u64 region_uptr; /* struct io_uring_region_desc * */ + __u64 flags; + __u64 __resv[2]; +}; + /* * Register a fully sparse file space, rather than pass in an array of all * -1 file descriptors. -- cgit v1.2.3 From 96a30e469ca1d2b8cc7811b40911f8614b558241 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Nov 2024 16:13:03 -0800 Subject: bpf: use common instruction history across all states Instead of allocating and copying instruction history each time we enqueue child verifier state, switch to a model where we use one common dynamically sized array of instruction history entries across all states. The key observation for proving this is correct is that instruction history is only relevant while state is active, which means it either is a current state (and thus we are actively modifying instruction history and no other state can interfere with us) or we are checkpointed state with some children still active (either enqueued or being current). In the latter case our portion of instruction history is finalized and won't change or grow, so as long as we keep it immutable until the state is finalized, we are good. Now, when state is finalized and is put into state hash for potentially future pruning lookups, instruction history is not used anymore. This is because instruction history is only used by precision marking logic, and we never modify precision markings for finalized states. So, instead of each state having its own small instruction history, we keep a global dynamically-sized instruction history, where each state in current DFS path from root to active state remembers its portion of instruction history. Current state can append to this history, but cannot modify any of its parent histories. Async callback state enqueueing, while logically detached from parent state, still is part of verification backtracking tree, so has to follow the same schema as normal state checkpoints. Because the insn_hist array can be grown through realloc, states don't keep pointers, they instead maintain two indices, [start, end), into global instruction history array. End is exclusive index, so `start == end` means there is no relevant instruction history. This eliminates a lot of allocations and minimizes overall memory usage. For instance, running a worst-case test from [0] (but without the heuristics-based fix [1]), it took 12.5 minutes until we get -ENOMEM. With the changes in this patch the whole test succeeds in 10 minutes (very slow, so heuristics from [1] is important, of course). To further validate correctness, veristat-based comparison was performed for Meta production BPF objects and BPF selftests objects. In both cases there were no differences *at all* in terms of verdict or instruction and state counts, providing a good confidence in the change. Having this low-memory-overhead solution of keeping dynamic per-instruction history cheaply opens up some new possibilities, like keeping extra information for literally every single validated instruction. This will be used for simplifying precision backpropagation logic in follow up patches. [0] https://lore.kernel.org/bpf/20241029172641.1042523-2-eddyz87@gmail.com/ [1] https://lore.kernel.org/bpf/20241029172641.1042523-1-eddyz87@gmail.com/ Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20241115001303.277272-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6b7c91629176..f4290c179bee 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -334,7 +334,7 @@ struct bpf_func_state { #define MAX_CALL_FRAMES 8 -/* instruction history flags, used in bpf_jmp_history_entry.flags field */ +/* instruction history flags, used in bpf_insn_hist_entry.flags field */ enum { /* instruction references stack slot through PTR_TO_STACK register; * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) @@ -352,7 +352,7 @@ enum { static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); -struct bpf_jmp_history_entry { +struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ u32 prev_idx : 22; @@ -442,13 +442,14 @@ struct bpf_verifier_state { * See get_loop_entry() for more information. */ struct bpf_verifier_state *loop_entry; - /* jmp history recorded from first to last. - * backtracking is using it to go from last to first. - * For most states jmp_history_cnt is [0-3]. + /* Sub-range of env->insn_hist[] corresponding to this state's + * instruction history. + * Backtracking is using it to go from last to first. + * For most states instruction history is short, 0-3 instructions. * For loops can go up to ~40. */ - struct bpf_jmp_history_entry *jmp_history; - u32 jmp_history_cnt; + u32 insn_hist_start; + u32 insn_hist_end; u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; @@ -738,7 +739,9 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; - struct bpf_jmp_history_entry *cur_hist_ent; + struct bpf_insn_hist_entry *insn_hist; + struct bpf_insn_hist_entry *cur_hist_ent; + u32 insn_hist_cap; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ -- cgit v1.2.3 From 906c508afdca3487c4273bfeda8abedc8e21047b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 15 Nov 2024 17:42:48 +0100 Subject: sysfs: attribute_group: allow registration of const bin_attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to constify instances of struct bin_attribute it has to be possible to add them to string attribute_group. The current type of the bin_attrs member however is not compatible with that. Introduce a union that allows registration of both const and non-const attributes to enable a piecewise transition. As both union member types are compatible no logic needs to be adapted. Technically it is now possible register a const struct bin_attribute and receive it as mutable pointer in the callbacks. This is a soundness issue. But this same soundness issue already exists today in sysfs_create_bin_file(). Also the struct definition and callback implementation are always closely linked and are meant to be moved to const in lockstep. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241115-b4-sysfs-const-bin_attr-group-v1-1-2c9bb12dfc48@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d713a6445a62..0f2fcd244523 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -106,7 +106,10 @@ struct attribute_group { const struct bin_attribute *, int); struct attribute **attrs; - struct bin_attribute **bin_attrs; + union { + struct bin_attribute **bin_attrs; + const struct bin_attribute *const *bin_attrs_new; + }; }; #define SYSFS_PREALLOC 010000 -- cgit v1.2.3 From d617b3147d54c42351eac63b5398d4ddf4f4011b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:43 +0000 Subject: io_uring: restore back registered wait arguments Now we've got a more generic region registration API, place IORING_ENTER_EXT_ARG_REG and re-enable it. First, the user has to register a region with the IORING_MEM_REGION_REG_WAIT_ARG flag set. It can only be done for a ring in a disabled state, aka IORING_SETUP_R_DISABLED, to avoid races with already running waiters. With that we should have stable constant values for ctx->cq_wait_{size,arg} in io_get_ext_arg_reg() and hence no READ_ONCE required. The other API difference is that we're now passing byte offsets instead of indexes. The user _must_ align all offsets / pointers to the native word size, failing to do so might but not necessarily has to lead to a failure usually returned as -EFAULT. liburing will be hiding this details from users. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/81822c1b4ffbe8ad391b4f9ad1564def0d26d990.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ include/uapi/linux/io_uring.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e1d69123e164..aa5f5ea98076 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -324,6 +324,9 @@ struct io_ring_ctx { unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; unsigned cq_extra; + + void *cq_wait_arg; + size_t cq_wait_size; } ____cacheline_aligned_in_smp; /* diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1ee35890125b..4418d0192959 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -663,6 +663,11 @@ struct io_uring_region_desc { __u64 __resv[4]; }; +enum { + /* expose the region as registered wait arguments */ + IORING_MEM_REGION_REG_WAIT_ARG = 1, +}; + struct io_uring_mem_region_reg { __u64 region_uptr; /* struct io_uring_region_desc * */ __u64 flags; -- cgit v1.2.3 From 9407f5c3ec10c155aae61fc4496a7c17a96907b4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Nov 2024 16:28:51 -0800 Subject: srcu: Unconditionally record srcu_read_lock_lite() in ->srcu_reader_flavor Currently, srcu_read_lock_lite() uses the SRCU_READ_FLAVOR_LITE bit in ->srcu_reader_flavor to communicate to the grace-period processing in srcu_readers_active_idx_check() that the smp_mb() must be replaced by a synchronize_rcu(). Unfortunately, ->srcu_reader_flavor is not updated unless the kernel is built with CONFIG_PROVE_RCU=y. Therefore in all kernels built with CONFIG_PROVE_RCU=n, srcu_readers_active_idx_check() incorrectly uses smp_mb() instead of synchronize_rcu() for srcu_struct structures whose readers use srcu_read_lock_lite(). This commit therefore causes Tree SRCU srcu_read_lock_lite() to unconditionally update ->srcu_reader_flavor so that srcu_readers_active_idx_check() can make the correct choice. Reported-by: Neeraj Upadhyay Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/ Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()") Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- include/linux/srcu.h | 8 +------- include/linux/srcutiny.h | 3 +++ include/linux/srcutree.h | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 56f83237de4d..08339eb8a01c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -183,12 +183,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) -void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -#else -#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) -#endif - /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing @@ -277,7 +271,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + srcu_check_read_flavor_lite(ssp); retval = __srcu_read_lock_lite(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 4d96bbdb45f0..1321da803274 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -81,6 +81,9 @@ static inline void srcu_barrier(struct srcu_struct *ssp) synchronize_srcu(ssp); } +#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) +#define srcu_check_read_flavor_lite(ssp) do { } while (0) + /* Defined here to avoid size increase for non-torture kernels. */ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 778eb61542e1..490aeecc6bb4 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -248,4 +248,25 @@ static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); } +void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); + +// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels. +static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) +{ + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) + return; + + // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. + __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); +} + +// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels. +static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) +{ + if (IS_ENABLED(CONFIG_PROVE_RCU)) + __srcu_check_read_flavor(ssp, read_flavor); +} + #endif -- cgit v1.2.3 From 41ffcd95015f18178ddc53fed919c2842d52fe38 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Nov 2024 10:33:27 +0000 Subject: net: phy: fix phylib's dual eee_enabled phylib has two eee_enabled members. Some parts of the code are using phydev->eee_enabled, other parts are using phydev->eee_cfg.eee_enabled. This leads to incorrect behaviour as their state goes out of sync. ethtool --show-eee shows incorrect information, and --set-eee sometimes doesn't take effect. Fix this by only having one eee_enabled member - that in eee_cfg. Fixes: 49168d1980e2 ("net: phy: Add phy_support_eee() indicating MAC support EEE") Signed-off-by: Russell King (Oracle) Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/E1tBXAF-00341F-EQ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index a98bc91a0cde..44890cdf40a2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -601,7 +601,6 @@ struct macsec_ops; * @adv_old: Saved advertised while power saving for WoL * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes - * @eee_enabled: Flag indicating whether the EEE feature is enabled * @enable_tx_lpi: When True, MAC should transmit LPI to PHY * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes @@ -721,7 +720,6 @@ struct phy_device { /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); - bool eee_enabled; /* Host supported PHY interface types. Should be ignored if empty. */ DECLARE_PHY_INTERFACE_MASK(host_interfaces); -- cgit v1.2.3 From 221a9c1df790fa711d65daf5ba05d0addc279153 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 14 Nov 2024 03:00:11 -0800 Subject: net: netpoll: Individualize the skb pool The current implementation of the netpoll system uses a global skb pool, which can lead to inefficient memory usage and waste when targets are disabled or no longer in use. This can result in a significant amount of memory being unnecessarily allocated and retained, potentially causing performance issues and limiting the availability of resources for other system components. Modify the netpoll system to assign a skb pool to each target instead of using a global one. This approach allows for more fine-grained control over memory allocation and deallocation, ensuring that resources are only allocated and retained as needed. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20241114-skb_buffers_v2-v3-1-9be9f52a8b69@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index cd4e28db0cbd..77635b885c18 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -32,6 +32,7 @@ struct netpoll { bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; + struct sk_buff_head skb_pool; }; struct netpoll_info { -- cgit v1.2.3 From 4b42fbc6bd8f73d9ded535d8c61ccaa837ff3bd4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 14 Nov 2024 15:09:53 +0100 Subject: ndo_fdb_add: Add a parameter to report whether notification was sent Currently when FDB entries are added to or deleted from a VXLAN netdevice, the VXLAN driver emits one notification, including the VXLAN-specific attributes. The core however always sends a notification as well, a generic one. Thus two notifications are unnecessarily sent for these operations. A similar situation comes up with bridge driver, which also emits notifications on its own: # ip link add name vx type vxlan id 1000 dstport 4789 # bridge monitor fdb & [1] 1981693 # bridge fdb add de:ad:be:ef:13:37 dev vx self dst 192.0.2.1 de:ad:be:ef:13:37 dev vx dst 192.0.2.1 self permanent de:ad:be:ef:13:37 dev vx self permanent In order to prevent this duplicity, add a paremeter to ndo_fdb_add, bool *notified. The flag is primed to false, and if the callee sends a notification on its own, it sets it to true, thus informing the core that it should not generate another notification. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/cbf6ae8195e85cbf922f8058ce4eba770f3b71ed.1731589511.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0aae346d919e..6a7fd191e1ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1248,8 +1248,10 @@ struct netdev_net_notifier { * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid, u16 flags, - * struct netlink_ext_ack *extack); + * bool *notified, struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. + * Callee shall set *notified to true if it sent any appropriate + * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid) @@ -1525,6 +1527,7 @@ struct net_device_ops { const unsigned char *addr, u16 vid, u16 flags, + bool *notified, struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], -- cgit v1.2.3 From 42575ad5aab932273475d1ec3e7881cb5a05420e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 14 Nov 2024 15:09:54 +0100 Subject: ndo_fdb_del: Add a parameter to report whether notification was sent In a similar fashion to ndo_fdb_add, which was covered in the previous patch, add the bool *notified argument to ndo_fdb_del. Callees that send a notification on their own set the flag to true. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/06b1acf4953ef0a5ed153ef1f32d7292044f2be6.1731589511.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a7fd191e1ee..ecc686409161 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1254,8 +1254,11 @@ struct netdev_net_notifier { * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid) + * const unsigned char *addr, u16 vid + * bool *notified, struct netlink_ext_ack *extack); * Deletes the FDB entry from dev corresponding to addr. + * Callee shall set *notified to true if it sent any appropriate + * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1533,7 +1536,9 @@ struct net_device_ops { struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, struct netlink_ext_ack *extack); + u16 vid, + bool *notified, + struct netlink_ext_ack *extack); int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 3ef66af31feaf5ff5dd73e63b1327789822ed476 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 12 Nov 2024 09:29:20 +0800 Subject: virtio_ring: introduce add api for premapped Two APIs are introduced to submit premapped per-buffers. int virtqueue_add_inbuf_premapped(struct virtqueue *vq, struct scatterlist *sg, unsigned int num, void *data, void *ctx, gfp_t gfp); int virtqueue_add_outbuf_premapped(struct virtqueue *vq, struct scatterlist *sg, unsigned int num, void *data, gfp_t gfp); Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Link: https://patch.msgid.link/20241112012928.102478-6-xuanzhuo@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/virtio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 306137a15d07..13b3f55abca3 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -56,6 +56,17 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, void *ctx, gfp_t gfp); +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + void *ctx, + gfp_t gfp); + +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, -- cgit v1.2.3 From 880ebcbe06636c42cfb328039581e177c6cd6ba5 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 12 Nov 2024 09:29:22 +0800 Subject: virtio_ring: remove API virtqueue_set_dma_premapped Now, this API is useless. remove it. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Link: https://patch.msgid.link/20241112012928.102478-8-xuanzhuo@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/virtio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 13b3f55abca3..338e0f5efb4b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -93,8 +93,6 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); -int virtqueue_set_dma_premapped(struct virtqueue *_vq); - bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); -- cgit v1.2.3 From 665745f274870c921020f610e2c99a3b1613519b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:52 +0300 Subject: PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mostly reverts the commit b4c7d2076b4e ("PCI/LINK: Remove bandwidth notification"). An upcoming commit extends this driver building PCIe bandwidth controller on top of it. PCIe bandwidth notifications were first added in the commit e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") but later had to be removed. The significant changes compared with the old bandwidth notification driver include: 1) Don't print the notifications into kernel log, just keep the Link Speed cached in struct pci_bus updated. While somewhat unfortunate, the log spam was the source of complaints that eventually lead to the removal of the bandwidth notifications driver (see the links below for further information). 2) Besides the Link Bandwidth Management Interrupt, also enable Link Autonomous Bandwidth Interrupt to cover the other source of bandwidth changes. 3) Handle Link Speed updates robustly. Refresh the cached Link Speed when enabling Bandwidth Notification Interrupts, and solve the race between Link Speed read and LBMS/LABS update in pcie_bwnotif_irq_thread(). 4) Use concurrency safe LNKCTL RMW operations. 5) The driver is now called PCIe bwctrl (bandwidth controller) instead of just bandwidth notifications because of increased scope and functionality within the driver. 6) Coexist with the Target Link Speed quirk in pcie_failed_link_retrain(). Provide LBMS counting API for it. 7) Tweaks to variable/functions names for consistency and length reasons. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. [bhelgaas: This is based on previous work by Alexandru Gagniuc ; see e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification")] Link: https://lore.kernel.org/r/20241018144755.7875-7-ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/all/20190429185611.121751-1-helgaas@kernel.org/ Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com/ Link: https://lore.kernel.org/linux-pci/20200115221008.GA191037@google.com/ Suggested-by: Lukas Wunner # Building bwctrl on top of bwnotif Signed-off-by: Ilpo Järvinen [bhelgaas: squash fix to drop IRQF_ONESHOT and convert to hardirq handler: https://lore.kernel.org/r/20241115165717.15233-1-ilpo.jarvinen@linux.intel.com] Signed-off-by: Bjorn Helgaas Tested-by: Stefan Wahren Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 99c6fa30d25b..579b28833429 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -313,6 +313,7 @@ struct pci_vpd { }; struct irq_affinity; +struct pcie_bwctrl_data; struct pcie_link_state; struct pci_sriov; struct pci_p2pdma; @@ -502,6 +503,7 @@ struct pci_dev { unsigned int dpc_rp_extensions:1; u8 dpc_rp_log_size; #endif + struct pcie_bwctrl_data *link_bwctrl; #ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* PF: SR-IOV info */ -- cgit v1.2.3 From de9a6c8d5dbfedb5eb3722c822da0490f6a59a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:53 +0300 Subject: PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, PCIe Link Speeds are adjusted by custom code rather than in a common function provided in PCI core. The PCIe bandwidth controller (bwctrl) introduces an in-kernel API, pcie_set_target_speed(), to set PCIe Link Speed. Convert Target Speed quirk to use the new API. The Target Speed quirk runs very early when bwctrl is not yet probed for a Port and can also run later when bwctrl is already setup for the Port, which requires the per port mutex (set_speed_mutex) to be only taken if the bwctrl setup is already complete. The new API is also intended to be used in an upcoming commit that adds a thermal cooling device to throttle PCIe bandwidth when thermal thresholds are reached. The PCIe bandwidth control procedure is as follows. The highest speed supported by the Port and the PCIe device which is not higher than the requested speed is selected and written into the Target Link Speed in the Link Control 2 Register. Then bandwidth controller retrains the PCIe Link. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. While Bandwidth Notifications should also be generated when bandwidth controller alters the PCIe Link Speed, a few platforms do not deliver LMBS interrupt after Link Training as expected. Thus, after changing the Link Speed, bandwidth controller makes additional read for the Link Status Register to ensure cur_bus_speed is consistent with the new PCIe Link Speed. Link: https://lore.kernel.org/r/20241018144755.7875-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: squash devm_mutex_init() error checking from https://lore.kernel.org/r/20241030163139.2111689-1-andriy.shevchenko@linux.intel.com, drop export of pcie_set_target_speed()] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 579b28833429..4f02d9f91399 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1797,9 +1797,19 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d, #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_native; + +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt); #else #define pcie_ports_disabled true #define pcie_ports_native false + +static inline int pcie_set_target_speed(struct pci_dev *port, + enum pci_bus_speed speed_req, + bool use_lt) +{ + return -EOPNOTSUPP; +} #endif #define PCIE_LINK_STATE_L0S (BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */ -- cgit v1.2.3 From d278b098282d1327f6e1be82aacb18457a4d244d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:54 +0300 Subject: thermal: Add PCIe cooling driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a thermal cooling driver to provide path to access PCIe bandwidth controller using the usual thermal interfaces. A cooling device is instantiated for controllable PCIe Ports from the bwctrl service driver. If registering the cooling device fails, allow bwctrl's probe to succeed regardless. As cdev in that case contains IS_ERR() pseudo "pointer", clean that up inside the probe function so the remove side doesn't need to suddenly make an odd looking IS_ERR() check. The thermal side state 0 means no throttling, i.e., maximum supported PCIe Link Speed. Link: https://lore.kernel.org/r/20241018144755.7875-9-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: dropped data->cdev test per https://lore.kernel.org/r/ZzRm1SJTwEMRsAr8@wunner.de] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Acked-by: Rafael J. Wysocki # From the cooling device interface perspective --- include/linux/pci-bwctrl.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/pci-bwctrl.h (limited to 'include') diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h new file mode 100644 index 000000000000..cee07127455b --- /dev/null +++ b/include/linux/pci-bwctrl.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PCIe bandwidth controller + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#ifndef LINUX_PCI_BWCTRL_H +#define LINUX_PCI_BWCTRL_H + +#include + +struct thermal_cooling_device; + +#ifdef CONFIG_PCIE_THERMAL +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port); +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev); +#else +static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + return NULL; +} +static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ +} +#endif + +#endif -- cgit v1.2.3 From d7a516c6eeae29144649f1c3f586fa580ec9e040 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 Nov 2024 21:46:02 +0100 Subject: compiler.h: Fix undefined BUILD_BUG_ON_ZERO() defines __must_be_array() and __must_be_cstr() and both expand to BUILD_BUG_ON_ZERO(), but defines BUILD_BUG_ON_ZERO(). Including in would create a cyclic dependency as already includes . Fix that by defining __BUILD_BUG_ON_ZERO_MSG() in and using that for __must_be_array() and __must_be_cstr(). Signed-off-by: Philipp Reisner Link: https://lore.kernel.org/r/20241115204602.249590-1-philipp.reisner@linbit.com Signed-off-by: Kees Cook --- include/linux/compiler.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 4d4e23b6e3e7..469a64dd6495 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -239,11 +239,18 @@ static inline void *offset_to_ptr(const int *off) #endif /* __ASSEMBLY__ */ +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#else /* __CHECKER__ */ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif /* __CHECKER__ */ + /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") /* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ -#define __must_be_cstr(p) BUILD_BUG_ON_ZERO(__annotated(p, nonstring)) +#define __must_be_cstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") /* * This returns a constant expression while determining if an argument is -- cgit v1.2.3 From accdd51dc74ff65b7b7be1961b11723d228fbbbd Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Thu, 14 Nov 2024 18:52:04 +0800 Subject: net/udp: Add a new struct for hash2 slot Preparing for udp 4-tuple hash (uhash4 for short). To implement uhash4 without cache line missing when lookup, hslot2 is used to record the number of hashed sockets in hslot4. Thus adding a new struct udp_hslot_main with field hash4_cnt, which is used by hash2. The new struct is used to avoid doubling the size of udp_hslot. Before uhash4 lookup, firstly checking hash4_cnt to see if there are hashed sks in hslot4. Because hslot2 is always used in lookup, there is no cache line miss. Related helpers are updated, and use the helpers as possible. uhash4 is implemented in following patches. Signed-off-by: Philo Lu Acked-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 61222545ab1c..62a7207e65f2 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -50,7 +50,7 @@ struct udp_skb_cb { #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) /** - * struct udp_hslot - UDP hash slot + * struct udp_hslot - UDP hash slot used by udp_table.hash * * @head: head of list of sockets * @count: number of sockets in 'head' list @@ -60,7 +60,22 @@ struct udp_hslot { struct hlist_head head; int count; spinlock_t lock; -} __attribute__((aligned(2 * sizeof(long)))); +} __aligned(2 * sizeof(long)); + +/** + * struct udp_hslot_main - UDP hash slot used by udp_table.hash2 + * + * @hslot: basic hash slot + * @hash4_cnt: number of sockets in hslot4 of the same + * (local port, local address) + */ +struct udp_hslot_main { + struct udp_hslot hslot; /* must be the first member */ +#if !IS_ENABLED(CONFIG_BASE_SMALL) + u32 hash4_cnt; +#endif +} __aligned(2 * sizeof(long)); +#define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot)) /** * struct udp_table - UDP table @@ -72,7 +87,7 @@ struct udp_hslot { */ struct udp_table { struct udp_hslot *hash; - struct udp_hslot *hash2; + struct udp_hslot_main *hash2; unsigned int mask; unsigned int log; }; @@ -84,6 +99,7 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table, { return &table->hash[udp_hashfn(net, num, table->mask)]; } + /* * For secondary hash, net_hash_mix() is performed before calling * udp_hashslot2(), this explains difference with udp_hashslot() @@ -91,8 +107,22 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table, static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, unsigned int hash) { - return &table->hash2[hash & table->mask]; + return &table->hash2[hash & table->mask].hslot; +} + +#if IS_ENABLED(CONFIG_BASE_SMALL) +static inline void udp_table_hash4_init(struct udp_table *table) +{ +} +#else /* !CONFIG_BASE_SMALL */ + +/* Must be called with table->hash2 initialized */ +static inline void udp_table_hash4_init(struct udp_table *table) +{ + for (int i = 0; i <= table->mask; i++) + table->hash2[i].hash4_cnt = 0; } +#endif /* CONFIG_BASE_SMALL */ extern struct proto udp_prot; -- cgit v1.2.3 From dab78a1745ab3c6001e1e4d50a9d09efef8e260d Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Thu, 14 Nov 2024 18:52:05 +0800 Subject: net/udp: Add 4-tuple hash list basis Add a new hash list, hash4, in udp table. It will be used to implement 4-tuple hash for connected udp sockets. This patch adds the hlist to table, and implements helpers and the initialization. 4-tuple hash is implemented in the following patch. hash4 uses hlist_nulls to avoid moving wrongly onto another hlist due to concurrent rehash, because rehash() can happen with lookup(). Co-developed-by: Cambda Zhu Signed-off-by: Cambda Zhu Co-developed-by: Fred Chen Signed-off-by: Fred Chen Co-developed-by: Yubing Qiu Signed-off-by: Yubing Qiu Signed-off-by: Philo Lu Acked-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 11 +++++++ include/net/udp.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 3eb3f2b9a2a0..0807e21cfec9 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -56,6 +56,12 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ +#if !IS_ENABLED(CONFIG_BASE_SMALL) + /* For UDP 4-tuple hash */ + __u16 udp_lrpa_hash; + struct hlist_nulls_node udp_lrpa_node; +#endif + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -206,6 +212,11 @@ static inline void udp_allow_gso(struct sock *sk) #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) +#if !IS_ENABLED(CONFIG_BASE_SMALL) +#define udp_lrpa_for_each_entry_rcu(__up, node, list) \ + hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node) +#endif + #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) #endif /* _LINUX_UDP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 62a7207e65f2..edb669967130 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -50,14 +50,21 @@ struct udp_skb_cb { #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) /** - * struct udp_hslot - UDP hash slot used by udp_table.hash + * struct udp_hslot - UDP hash slot used by udp_table.hash/hash4 * * @head: head of list of sockets + * @nulls_head: head of list of sockets, only used by hash4 * @count: number of sockets in 'head' list * @lock: spinlock protecting changes to head/count */ struct udp_hslot { - struct hlist_head head; + union { + struct hlist_head head; + /* hash4 uses hlist_nulls to avoid moving wrongly onto another + * hlist, because rehash() can happen with lookup(). + */ + struct hlist_nulls_head nulls_head; + }; int count; spinlock_t lock; } __aligned(2 * sizeof(long)); @@ -82,12 +89,17 @@ struct udp_hslot_main { * * @hash: hash table, sockets are hashed on (local port) * @hash2: hash table, sockets are hashed on (local port, local address) + * @hash4: hash table, connected sockets are hashed on + * (local port, local address, remote port, remote address) * @mask: number of slots in hash tables, minus 1 * @log: log2(number of slots in hash table) */ struct udp_table { struct udp_hslot *hash; struct udp_hslot_main *hash2; +#if !IS_ENABLED(CONFIG_BASE_SMALL) + struct udp_hslot *hash4; +#endif unsigned int mask; unsigned int log; }; @@ -114,13 +126,80 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, static inline void udp_table_hash4_init(struct udp_table *table) { } + +static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, + unsigned int hash) +{ + BUILD_BUG(); + return NULL; +} + +static inline bool udp_hashed4(const struct sock *sk) +{ + return false; +} + +static inline unsigned int udp_hash4_slot_size(void) +{ + return 0; +} + +static inline bool udp_has_hash4(const struct udp_hslot *hslot2) +{ + return false; +} + +static inline void udp_hash4_inc(struct udp_hslot *hslot2) +{ +} + +static inline void udp_hash4_dec(struct udp_hslot *hslot2) +{ +} #else /* !CONFIG_BASE_SMALL */ /* Must be called with table->hash2 initialized */ static inline void udp_table_hash4_init(struct udp_table *table) { - for (int i = 0; i <= table->mask; i++) + table->hash4 = (void *)(table->hash2 + (table->mask + 1)); + for (int i = 0; i <= table->mask; i++) { table->hash2[i].hash4_cnt = 0; + + INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i); + table->hash4[i].count = 0; + spin_lock_init(&table->hash4[i].lock); + } +} + +static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, + unsigned int hash) +{ + return &table->hash4[hash & table->mask]; +} + +static inline bool udp_hashed4(const struct sock *sk) +{ + return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node); +} + +static inline unsigned int udp_hash4_slot_size(void) +{ + return sizeof(struct udp_hslot); +} + +static inline bool udp_has_hash4(const struct udp_hslot *hslot2) +{ + return UDP_HSLOT_MAIN(hslot2)->hash4_cnt; +} + +static inline void udp_hash4_inc(struct udp_hslot *hslot2) +{ + UDP_HSLOT_MAIN(hslot2)->hash4_cnt++; +} + +static inline void udp_hash4_dec(struct udp_hslot *hslot2) +{ + UDP_HSLOT_MAIN(hslot2)->hash4_cnt--; } #endif /* CONFIG_BASE_SMALL */ -- cgit v1.2.3 From 78c91ae2c6deb5d236a5a93ff2995cdd05514380 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Thu, 14 Nov 2024 18:52:06 +0800 Subject: ipv4/udp: Add 4-tuple hash for connected socket Currently, the udp_table has two hash table, the port hash and portaddr hash. Usually for UDP servers, all sockets have the same local port and addr, so they are all on the same hash slot within a reuseport group. In some applications, UDP servers use connect() to manage clients. In particular, when firstly receiving from an unseen 4 tuple, a new socket is created and connect()ed to the remote addr:port, and then the fd is used exclusively by the client. Once there are connected sks in a reuseport group, udp has to score all sks in the same hash2 slot to find the best match. This could be inefficient with a large number of connections, resulting in high softirq overhead. To solve the problem, this patch implement 4-tuple hash for connected udp sockets. During connect(), hash4 slot is updated, as well as a corresponding counter, hash4_cnt, in hslot2. In __udp4_lib_lookup(), hslot4 will be searched firstly if the counter is non-zero. Otherwise, hslot2 is used like before. Note that only connected sockets enter this hash4 path, while un-connected ones are not affected. hlist_nulls is used for hash4, because we probably move to another hslot wrongly when lookup with concurrent rehash. Then we check nulls at the list end to see if we should restart lookup. Because udp does not use SLAB_TYPESAFE_BY_RCU, we don't need to touch sk_refcnt when lookup. Stress test results (with 1 cpu fully used) are shown below, in pps: (1) _un-connected_ socket as server [a] w/o hash4: 1,825176 [b] w/ hash4: 1,831750 (+0.36%) (2) 500 _connected_ sockets as server [c] w/o hash4: 290860 (only 16% of [a]) [d] w/ hash4: 1,889658 (+3.1% compared with [b]) With hash4, compute_score is skipped when lookup, so [d] is slightly better than [b]. Co-developed-by: Cambda Zhu Signed-off-by: Cambda Zhu Co-developed-by: Fred Chen Signed-off-by: Fred Chen Co-developed-by: Yubing Qiu Signed-off-by: Yubing Qiu Signed-off-by: Philo Lu Acked-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index edb669967130..feb06c0e48fb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -302,13 +302,27 @@ static inline int udp_lib_hash(struct sock *sk) } void udp_lib_unhash(struct sock *sk); -void udp_lib_rehash(struct sock *sk, u16 new_hash); +void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); static inline void udp_lib_close(struct sock *sk, long timeout) { sk_common_release(sk); } +/* hash4 routines shared between UDPv4/6 */ +#if IS_ENABLED(CONFIG_BASE_SMALL) +static inline void udp_lib_hash4(struct sock *sk, u16 hash) +{ +} + +static inline void udp4_hash4(struct sock *sk) +{ +} +#else /* !CONFIG_BASE_SMALL */ +void udp_lib_hash4(struct sock *sk, u16 hash); +void udp4_hash4(struct sock *sk); +#endif /* CONFIG_BASE_SMALL */ + int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr); -- cgit v1.2.3 From 1b29a730ef8b6fd3aa3e11c2f6d409cf201cd913 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Thu, 14 Nov 2024 18:52:07 +0800 Subject: ipv6/udp: Add 4-tuple hash for connected socket Implement ipv6 udp hash4 like that in ipv4. The major difference is that the hash value should be calculated with udp6_ehashfn(). Besides, ipv4-mapped ipv6 address is handled before hash() and rehash(). Export udp_ehashfn because now we use it in udpv6 rehash. Core procedures of hash/unhash/rehash are same as ipv4, and udpv4 and udpv6 share the same udptable, so some functions in ipv4 hash4 can also be shared. Co-developed-by: Cambda Zhu Signed-off-by: Cambda Zhu Co-developed-by: Fred Chen Signed-off-by: Fred Chen Co-developed-by: Yubing Qiu Signed-off-by: Yubing Qiu Signed-off-by: Philo Lu Acked-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index feb06c0e48fb..6e89520e100d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -303,6 +303,8 @@ static inline int udp_lib_hash(struct sock *sk) void udp_lib_unhash(struct sock *sk); void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); +u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, + const __be32 faddr, const __be16 fport); static inline void udp_lib_close(struct sock *sk, long timeout) { -- cgit v1.2.3 From a06e4a93067cd8f55a74638d45146ddde76574f2 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 13 Nov 2024 09:32:15 +1100 Subject: rtc: m48t59: Use platform_data struct for year offset value Instead of hard-coded values and ifdefs, store the year offset in the platform_data struct. Tested-by: Daniel Palmer Reviewed-by: Geert Uytterhoeven Signed-off-by: Finn Thain Tested-by: Andreas Larsson Acked-by: Andreas Larsson Link: https://lore.kernel.org/r/665c3526184a8d0c4a6373297d8e7d9a12591d8b.1731450735.git.fthain@linux-m68k.org Signed-off-by: Alexandre Belloni --- include/linux/rtc/m48t59.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/rtc/m48t59.h b/include/linux/rtc/m48t59.h index 9465d5405fe2..373ba77071c6 100644 --- a/include/linux/rtc/m48t59.h +++ b/include/linux/rtc/m48t59.h @@ -56,6 +56,9 @@ struct m48t59_plat_data { void __iomem *ioaddr; /* offset to RTC registers, automatically set according to the type */ unsigned int offset; + + /* YY digits (in RTC) are offset, i.e. year is 1900 + yy_offset + YY */ + int yy_offset; }; #endif /* _LINUX_RTC_M48T59_H_ */ -- cgit v1.2.3 From c750629caeca01979da3403f4bebecda88713233 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 18 Nov 2024 15:14:34 +0000 Subject: io_uring: remove io_uring_cqwait_reg_arg A separate wait argument registration API was removed, also delete leftover uapi definitions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/143b6a53591badac23632d3e6fa3e5db4b342ee2.1731942445.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4418d0192959..aac9a4f8fa9a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -873,20 +873,6 @@ enum { IORING_REG_WAIT_TS = (1U << 0), }; -/* - * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of - * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is - * called rather than pass in a wait argument structure separately. - */ -struct io_uring_cqwait_reg_arg { - __u32 flags; - __u32 struct_size; - __u32 nr_entries; - __u32 pad; - __u64 user_addr; - __u64 pad2[3]; -}; - /* * Argument for io_uring_enter(2) with * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument -- cgit v1.2.3 From 03854920c39c62b88c0b540c92cf35746d059af2 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 6 Oct 2024 02:19:52 +0100 Subject: libceph: Remove unused ceph_pagelist functions ceph_pagelist_truncate() and ceph_pagelist_set_cursor() have been unused since commit 39be95e9c8c0 ("ceph: ceph_pagelist_append might sleep while atomic") Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/pagelist.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index 5dead8486fd8..879bec0863aa 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -17,12 +17,6 @@ struct ceph_pagelist { refcount_t refcnt; }; -struct ceph_pagelist_cursor { - struct ceph_pagelist *pl; /* pagelist, for error checking */ - struct list_head *page_lru; /* page in list */ - size_t room; /* room remaining to reset to */ -}; - struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags); extern void ceph_pagelist_release(struct ceph_pagelist *pl); @@ -33,12 +27,6 @@ extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); -extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c); - -extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c); - static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) { __le64 ev = cpu_to_le64(v); -- cgit v1.2.3 From ee1eb8ccaab8cc2ef4bda8e11a40409ee20f6405 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 6 Oct 2024 02:19:53 +0100 Subject: libceph: Remove unused pagevec functions ceph_copy_user_to_page_vector() has been unused since 2013's commit e8344e668915 ("ceph: Implement writev/pwritev for sync operation.") ceph_copy_to_page_vector() has been unused since 2012's commit 913d2fdcf605 ("rbd: always pass ops array to rbd_req_sync_op()") Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 15fb566d3f46..733e7f93db66 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -317,12 +317,6 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages); extern void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); -extern int ceph_copy_user_to_page_vector(struct page **pages, - const void __user *data, - loff_t off, size_t len); -extern void ceph_copy_to_page_vector(struct page **pages, - const void *data, - loff_t off, size_t len); extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); -- cgit v1.2.3 From 32844fd72b879d02f1f6b4394025349d31a09fd3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 6 Oct 2024 02:19:54 +0100 Subject: libceph: Remove unused ceph_osdc_watch_check ceph_osdc_watch_check() has been unused since it was added in commit b07d3c4bd727 ("libceph: support for checking on status of watch") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d7941478158c..d55b30057a45 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -626,8 +626,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, u32 timeout, struct page ***preply_pages, size_t *preply_len); -int ceph_osdc_watch_check(struct ceph_osd_client *osdc, - struct ceph_osd_linger_request *lreq); int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, -- cgit v1.2.3 From 979c6342f9c0a48696a6420f14f9dd409591657f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 15 Nov 2024 13:41:21 -0800 Subject: nvme-pci: add support for sgl metadata Supporting this mode allows creating and merging multi-segment metadata requests that wouldn't be possible otherwise. It also allows directly using user space requests that straddle physically discontiguous pages. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 0a6e22038ce3..5873ce859cc8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -389,6 +389,7 @@ enum { NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, + NVME_CTRL_SGLS_MSDS = 1 << 19, }; struct nvme_lbaf { -- cgit v1.2.3 From 6399a0db8cd61eedbfb4b7809a4f4699157a9bf8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Nov 2024 13:57:04 -0800 Subject: nvme: define the remaining used sgls constants This provides a little more context when reading the code than hardcoded magic numbers. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5873ce859cc8..2baf9a80b470 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -389,7 +389,11 @@ enum { NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, + NVME_CTRL_SGLS_BYTE_ALIGNED = 1, + NVME_CTRL_SGLS_DWORD_ALIGNED = 2, + NVME_CTRL_SGLS_KSDBDS = 1 << 2, NVME_CTRL_SGLS_MSDS = 1 << 19, + NVME_CTRL_SGLS_SAOS = 1 << 20, }; struct nvme_lbaf { -- cgit v1.2.3 From 28b6acd75e3cefbe746ec7402c7ff4fdb114f327 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Thu, 14 Nov 2024 17:10:32 +0100 Subject: PCI: endpoint: Fix pci_epc_map map_size kerneldoc string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region, the map PCI start address is not necessarily the desired PCI base address to be mapped. This can result in map_pci_addr being lower than pci_addr as documented. This results in map_size covering the range map_pci_addr..pci_addr+pci_size. The old text had the pci_addr twice instead of map_pci_addr..pci_addr, so replace the erroneous kerneldoc string to reflect the actual range. Link: https://lore.kernel.org/r/20241114161032.3046202-1-rick.wertenbroek@gmail.com Signed-off-by: Rick Wertenbroek Signed-off-by: Krzysztof Wilczyński --- include/linux/pci-epc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index de8cc3658220..e818e3fdcded 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -40,7 +40,7 @@ pci_epc_interface_string(enum pci_epc_interface_type type) * @map_pci_addr: RC PCI address used as the first address mapped (may be lower * than @pci_addr) * @map_size: size of the controller memory needed for mapping the RC PCI address - * range @pci_addr..@pci_addr+@pci_size + * range @map_pci_addr..@pci_addr+@pci_size * @phys_base: base physical address of the allocated EPC memory for mapping the * RC PCI address range * @phys_addr: physical address at which @pci_addr is mapped -- cgit v1.2.3 From 2f746e40e9baae878f8193e09d8f6d601dce42bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:27 -0400 Subject: lockd: Remove unused typedef Clean up: Looks like the last usage of this typedef was removed by commit 026fec7e7c47 ("sunrpc: properly type pc_decode callbacks") in 2017. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/lockd/xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 80cca9426761..17d53165d9f2 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -73,8 +73,6 @@ struct nlm_args { u32 fsm_mode; }; -typedef struct nlm_args nlm_args; - /* * Generic lockd result */ -- cgit v1.2.3 From 8994a512e2598c0bf1b6e9ece1e8292976b0dd65 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:30 -0400 Subject: lockd: Remove unused parameter to nlmsvc_testlock() The nlm_cookie parameter has been unused since commit 09802fd2a8ca ("lockd: rip out deferred lock handling from testlock codepath"). Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 61c4b9c41904..c8f0f9458f2c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -278,9 +278,9 @@ __be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, int, struct nlm_cookie *, int); __be32 nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *); -__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, - struct nlm_host *, struct nlm_lock *, - struct nlm_lock *, struct nlm_cookie *); +__be32 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, + struct nlm_host *host, struct nlm_lock *lock, + struct nlm_lock *conflock); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, -- cgit v1.2.3 From c840b8e1f039e90f97ca55525667eb961422f86c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 13 Nov 2024 22:59:38 -0500 Subject: nfs_common: must not hold RCU while calling nfsd_file_put_local Move holding the RCU from nfs_to_nfsd_file_put_local to nfs_to_nfsd_net_put. It is the call to nfs_to->nfsd_serv_put that requires the RCU anyway (the puts for nfsd_file and netns were combined to avoid an extra indirect reference but that micro-optimization isn't possible now). This fixes xfstests generic/013 and it triggering: "Voluntary context switch within RCU read-side critical section!" [ 143.545738] Call Trace: [ 143.546206] [ 143.546625] ? show_regs+0x6d/0x80 [ 143.547267] ? __warn+0x91/0x140 [ 143.547951] ? rcu_note_context_switch+0x496/0x5d0 [ 143.548856] ? report_bug+0x193/0x1a0 [ 143.549557] ? handle_bug+0x63/0xa0 [ 143.550214] ? exc_invalid_op+0x1d/0x80 [ 143.550938] ? asm_exc_invalid_op+0x1f/0x30 [ 143.551736] ? rcu_note_context_switch+0x496/0x5d0 [ 143.552634] ? wakeup_preempt+0x62/0x70 [ 143.553358] __schedule+0xaa/0x1380 [ 143.554025] ? _raw_spin_unlock_irqrestore+0x12/0x40 [ 143.554958] ? try_to_wake_up+0x1fe/0x6b0 [ 143.555715] ? wake_up_process+0x19/0x20 [ 143.556452] schedule+0x2e/0x120 [ 143.557066] schedule_preempt_disabled+0x19/0x30 [ 143.557933] rwsem_down_read_slowpath+0x24d/0x4a0 [ 143.558818] ? xfs_efi_item_format+0x50/0xc0 [xfs] [ 143.559894] down_read+0x4e/0xb0 [ 143.560519] xlog_cil_commit+0x1b2/0xbc0 [xfs] [ 143.561460] ? _raw_spin_unlock+0x12/0x30 [ 143.562212] ? xfs_inode_item_precommit+0xc7/0x220 [xfs] [ 143.563309] ? xfs_trans_run_precommits+0x69/0xd0 [xfs] [ 143.564394] __xfs_trans_commit+0xb5/0x330 [xfs] [ 143.565367] xfs_trans_roll+0x48/0xc0 [xfs] [ 143.566262] xfs_defer_trans_roll+0x57/0x100 [xfs] [ 143.567278] xfs_defer_finish_noroll+0x27a/0x490 [xfs] [ 143.568342] xfs_defer_finish+0x1a/0x80 [xfs] [ 143.569267] xfs_bunmapi_range+0x4d/0xb0 [xfs] [ 143.570208] xfs_itruncate_extents_flags+0x13d/0x230 [xfs] [ 143.571353] xfs_free_eofblocks+0x12e/0x190 [xfs] [ 143.572359] xfs_file_release+0x12d/0x140 [xfs] [ 143.573324] __fput+0xe8/0x2d0 [ 143.573922] __fput_sync+0x1d/0x30 [ 143.574574] nfsd_filp_close+0x33/0x60 [nfsd] [ 143.575430] nfsd_file_free+0x96/0x150 [nfsd] [ 143.576274] nfsd_file_put+0xf7/0x1a0 [nfsd] [ 143.577104] nfsd_file_put_local+0x18/0x30 [nfsd] [ 143.578070] nfs_close_local_fh+0x101/0x110 [nfs_localio] [ 143.579079] __put_nfs_open_context+0xc9/0x180 [nfs] [ 143.580031] nfs_file_clear_open_context+0x4a/0x60 [nfs] [ 143.581038] nfs_file_release+0x3e/0x60 [nfs] [ 143.581879] __fput+0xe8/0x2d0 [ 143.582464] __fput_sync+0x1d/0x30 [ 143.583108] __x64_sys_close+0x41/0x80 [ 143.583823] x64_sys_call+0x189a/0x20d0 [ 143.584552] do_syscall_64+0x64/0x170 [ 143.585240] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 143.586185] RIP: 0033:0x7f3c5153efd7 Fixes: 65f2a5c36635 ("nfs_common: fix race in NFS calls to nfsd_file_put_local() and nfsd_serv_put()") Signed-off-by: Mike Snitzer Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/nfslocalio.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 3982fea79919..9202f4b24343 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -55,7 +55,7 @@ struct nfsd_localio_operations { const struct cred *, const struct nfs_fh *, const fmode_t); - void (*nfsd_file_put_local)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -66,7 +66,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, const fmode_t); -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_net_put(struct net *net) { /* * Once reference to nfsd_serv is dropped, NFSD could be @@ -74,10 +74,22 @@ static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) * by always taking RCU. */ rcu_read_lock(); - nfs_to->nfsd_file_put_local(localio); + nfs_to->nfsd_serv_put(net); rcu_read_unlock(); } +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +{ + /* + * Must not hold RCU otherwise nfsd_file_put() can easily trigger: + * "Voluntary context switch within RCU read-side critical section!" + * by scheduling deep in underlying filesystem (e.g. XFS). + */ + struct net *net = nfs_to->nfsd_file_put_local(localio); + + nfs_to_nfsd_net_put(net); +} + #else /* CONFIG_NFS_LOCALIO */ static inline void nfsd_localio_ops_init(void) { -- cgit v1.2.3 From 1cfb5e57886aa69a992ff0ebd32e4651eb0fc995 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Nov 2024 12:43:03 -0800 Subject: Revert "net: ethtool: Avoid thousands of -Wflex-array-member-not-at-end warnings" This reverts commit 3bd9b9abdf1563a22041b7255baea6d449902f1a. We cannot use the new tagged struct group because it throws C++ errors even under "extern C". Signed-off-by: Kees Cook Link: https://patch.msgid.link/20241115204308.3821419-1-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 299280c94d07..b8b935b52603 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,7 +211,7 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); * fields, but they are allowed to overwrite them (will be ignored). */ struct ethtool_link_ksettings { - struct ethtool_link_settings_hdr base; + struct ethtool_link_settings base; struct { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); -- cgit v1.2.3 From ebda123fe703f492d7d557a4da00888ddec4779e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Nov 2024 12:43:04 -0800 Subject: Revert "UAPI: ethtool: Use __struct_group() in struct ethtool_link_settings" This reverts commit 43d3487035e9a86fad952de4240a518614240d43. We cannot use tagged struct groups in UAPI because C++ will throw syntax errors even under "extern C". Signed-off-by: Kees Cook Link: https://patch.msgid.link/20241115204308.3821419-2-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fc1f54b065f9..c405ed63acfa 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2511,24 +2511,21 @@ enum ethtool_reset_flags { * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { - /* New members MUST be added within the __struct_group() macro below. */ - __struct_group(ethtool_link_settings_hdr, hdr, /* no attrs */, - __u32 cmd; - __u32 speed; - __u8 duplex; - __u8 port; - __u8 phy_address; - __u8 autoneg; - __u8 mdio_support; - __u8 eth_tp_mdix; - __u8 eth_tp_mdix_ctrl; - __s8 link_mode_masks_nwords; - __u8 transceiver; - __u8 master_slave_cfg; - __u8 master_slave_state; - __u8 rate_matching; - __u32 reserved[7]; - ); + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u8 transceiver; + __u8 master_slave_cfg; + __u8 master_slave_state; + __u8 rate_matching; + __u32 reserved[7]; __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; -- cgit v1.2.3 From 96c677fca54a28fcfea4dbab9c1f2530bd0a08d1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Nov 2024 12:43:05 -0800 Subject: UAPI: ethtool: Avoid flex-array in struct ethtool_link_settings struct ethtool_link_settings tends to be used as a header for other structures that have trailing bytes[1], but has a trailing flexible array itself. Using this overlapped with other structures leads to ambiguous object sizing in the compiler, so we want to avoid such situations (which have caused real bugs in the past). Detecting this can be done with -Wflex-array-member-not-at-end, which will need to be enabled globally. Using a tagged struct_group() to create a new ethtool_link_settings_hdr structure isn't possible as it seems we cannot use the tagged variant of struct_group() due to syntax issues from C++'s perspective (even within "extern C")[2]. Instead, we can just leave the offending member defined in UAPI and remove it from the kernel's view of the structure, as Linux doesn't actually use this member at all. There is also no change in size since it was already a flexible array that didn't contribute to size returned by any use of sizeof(). Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/lkml/20241109100213.262a2fa0@kernel.org/ [2] Link: https://lore.kernel.org/lkml/0bc2809fe2a6c11dd4c8a9a10d9bd65cccdb559b.1730238285.git.gustavoars@kernel.org/ [1] Signed-off-by: Kees Cook Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241115204308.3821419-3-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c405ed63acfa..7e1b3820f91f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2526,12 +2526,19 @@ struct ethtool_link_settings { __u8 master_slave_state; __u8 rate_matching; __u32 reserved[7]; +#ifndef __KERNEL__ + /* Linux builds with -Wflex-array-member-not-at-end but does + * not use the "link_mode_masks" member. Leave it defined for + * userspace for now, and when userspace wants to start using + * -Wfamnae, we'll need a new solution. + */ __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; * __u32 map_advertising[link_mode_masks_nwords]; * __u32 map_lp_advertising[link_mode_masks_nwords]; */ +#endif }; /** -- cgit v1.2.3 From a57d5a72f8dec7db8a79d0016fb0a3bdecc82b56 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 18 Nov 2024 03:15:18 -0800 Subject: netpoll: Use rcu_access_pointer() in netpoll_poll_lock The ndev->npinfo pointer in netpoll_poll_lock() is RCU-protected but is being accessed directly for a NULL check. While no RCU read lock is held in this context, we should still use proper RCU primitives for consistency and correctness. Replace the direct NULL check with rcu_access_pointer(), which is the appropriate primitive when only checking for NULL without dereferencing the pointer. This function provides the necessary ordering guarantees without requiring RCU read-side protection. Fixes: bea3348eef27 ("[NET]: Make NAPI polling independent of struct net_device objects.") Signed-off-by: Breno Leitao Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/20241118-netpoll_rcu-v1-2-a1888dcb4a02@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index cd4e28db0cbd..959a4daacea1 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -72,7 +72,7 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi) { struct net_device *dev = napi->dev; - if (dev && dev->npinfo) { + if (dev && rcu_access_pointer(dev->npinfo)) { int owner = smp_processor_id(); while (cmpxchg(&napi->poll_owner, -1, owner) != -1) -- cgit v1.2.3 From 50f42c489528566ea2d8a9561ee54748b0441d51 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 26 Jun 2024 09:49:10 -0400 Subject: ceph: correct ceph_mds_cap_item field name The issue_seq is sent with bulk cap releases, not the current sequence number. See also ceph.git commit 655cddb7c9f3 ("include/ceph_fs: correct ceph_mds_cap_item field name"). Link: https://tracker.ceph.com/issues/66704 Signed-off-by: Patrick Donnelly Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ee1d0e5f9789..4ff3ad5e9210 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -822,7 +822,7 @@ struct ceph_mds_cap_release { struct ceph_mds_cap_item { __le64 ino; __le64 cap_id; - __le32 migrate_seq, seq; + __le32 migrate_seq, issue_seq; } __attribute__ ((packed)); #define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ -- cgit v1.2.3 From 8b41ac43c7bb902786eae09cc23bcc9964ef2386 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 26 Jun 2024 12:57:27 -0400 Subject: ceph: correct ceph_mds_cap_peer field name The peer seq is used as the issue_seq. Use that name for consistency. See also ceph.git commit 1da6ef237fc7 ("include/ceph_fs: correct ceph_mds_cap_peer field name"). Link: https://tracker.ceph.com/issues/66704 Signed-off-by: Patrick Donnelly Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 4ff3ad5e9210..2d7d86f0290d 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -808,7 +808,7 @@ struct ceph_mds_caps { struct ceph_mds_cap_peer { __le64 cap_id; - __le32 seq; + __le32 issue_seq; __le32 mseq; __le32 mds; __u8 flags; -- cgit v1.2.3 From 46fd48ab3ea3eb3bb215684bd66ea3d260b091a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 08:26:02 +0100 Subject: block: return unsigned int from bdev_io_min The underlying limit is defined as an unsigned int, so return that from bdev_io_min as well. Fixes: ac481c20ef8f ("block: Topology ioctls") Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: John Garry Link: https://lore.kernel.org/r/20241119072602.1059488-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a1fd0ddce5cf..195db38fda16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1261,7 +1261,7 @@ static inline unsigned int queue_io_min(const struct request_queue *q) return q->limits.io_min; } -static inline int bdev_io_min(struct block_device *bdev) +static inline unsigned int bdev_io_min(struct block_device *bdev) { return queue_io_min(bdev_get_queue(bdev)); } -- cgit v1.2.3 From f06e108a3dc53c0f5234d18de0bd224753db5019 Mon Sep 17 00:00:00 2001 From: Jan Hendrik Farr Date: Tue, 29 Oct 2024 15:00:36 +0100 Subject: Compiler Attributes: disable __counted_by for clang < 19.1.3 This patch disables __counted_by for clang versions < 19.1.3 because of the two issues listed below. It does this by introducing CONFIG_CC_HAS_COUNTED_BY. 1. clang < 19.1.2 has a bug that can lead to __bdos returning 0: https://github.com/llvm/llvm-project/pull/110497 2. clang < 19.1.3 has a bug that can lead to __bdos being off by 4: https://github.com/llvm/llvm-project/pull/112636 Fixes: c8248faf3ca2 ("Compiler Attributes: counted_by: Adjust name and identifier expansion") Cc: stable@vger.kernel.org # 6.6.x: 16c31dd7fdf6: Compiler Attributes: counted_by: bump min gcc version Cc: stable@vger.kernel.org # 6.6.x: 2993eb7a8d34: Compiler Attributes: counted_by: fixup clang URL Cc: stable@vger.kernel.org # 6.6.x: 231dc3f0c936: lkdtm/bugs: Improve warning message for compilers without counted_by support Cc: stable@vger.kernel.org # 6.6.x Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/all/20240913164630.GA4091534@thelio-3990X/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409260949.a1254989-oliver.sang@intel.com Link: https://lore.kernel.org/all/Zw8iawAF5W2uzGuh@archlinux/T/#m204c09f63c076586a02d194b87dffc7e81b8de7b Suggested-by: Nathan Chancellor Signed-off-by: Jan Hendrik Farr Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Reviewed-by: Thorsten Blum Link: https://lore.kernel.org/r/20241029140036.577804-2-kernel@jfarr.cc Signed-off-by: Kees Cook --- include/linux/compiler_attributes.h | 13 ------------- include/linux/compiler_types.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 32284cd26d52..c16d4199bf92 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -94,19 +94,6 @@ # define __copy(symbol) #endif -/* - * Optional: only supported since gcc >= 15 - * Optional: only supported since clang >= 18 - * - * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://github.com/llvm/llvm-project/pull/76348 - */ -#if __has_attribute(__counted_by__) -# define __counted_by(member) __attribute__((__counted_by__(member))) -#else -# define __counted_by(member) -#endif - /* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 1a957ea2f4fe..639be0f30b45 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -323,6 +323,25 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* + * Optional: only supported since gcc >= 15 + * Optional: only supported since clang >= 18 + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + * clang: https://github.com/llvm/llvm-project/pull/76348 + * + * __bdos on clang < 19.1.2 can erroneously return 0: + * https://github.com/llvm/llvm-project/pull/110497 + * + * __bdos on clang < 19.1.3 can be off by 4: + * https://github.com/llvm/llvm-project/pull/112636 + */ +#ifdef CONFIG_CC_HAS_COUNTED_BY +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif + /* * Apply __counted_by() when the Endianness matches to increase test coverage. */ -- cgit v1.2.3 From d7f36dc446e894e0f57b5f05c5628f03c5f9e2d2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 18 Nov 2024 10:50:15 +0000 Subject: block: Support atomic writes limits for stacked devices Allow stacked devices to support atomic writes by aggregating the minimum capability of all bottom devices. Flag BLK_FEAT_ATOMIC_WRITES_STACKED is set for stacked devices which have been enabled to support atomic writes. Some things to note on the implementation: - For simplicity, all bottom devices must have same atomic write boundary value (if any) - The atomic write boundary must be a power-of-2 already, but this restriction could be relaxed. Furthermore, it is now required that the chunk sectors for a top device must be aligned with this boundary. - If a bottom device atomic write unit min/max are not aligned with the top device chunk sectors, the top device atomic write unit min/max are reduced to a value which works for the chunk sectors. Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20241118105018.1870052-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 195db38fda16..31867de88213 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -333,6 +333,10 @@ typedef unsigned int __bitwise blk_features_t; #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) +/* stacked device can/does support atomic writes */ +#define BLK_FEAT_ATOMIC_WRITES_STACKED \ + ((__force blk_features_t)(1u << 16)) + /* * Flags automatically inherited when stacking limits. */ -- cgit v1.2.3 From 9cf9f2e70bea4e66a2c8b8c4743489beb21258a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Nov 2024 22:00:25 +0100 Subject: cpuidle: Change :enter_dead() driver callback return type to void After a previous change, cpuidle_play_dead(), which is the only caller of idle state :enter_dead() callbacks, ignores their return values, so they may as well be void. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/2285569.iZASKD2KPV@rjwysocki.net --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3183aeb7f5b4..a9ee4fe55dcf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -61,7 +61,7 @@ struct cpuidle_state { struct cpuidle_driver *drv, int index); - int (*enter_dead) (struct cpuidle_device *dev, int index); + void (*enter_dead) (struct cpuidle_device *dev, int index); /* * CPUs execute ->enter_s2idle with the local tick or entire timekeeping -- cgit v1.2.3 From 5017ec667b48464a301d34db9b63fa5ab8765d49 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 11 Nov 2024 13:54:09 +0300 Subject: kunit: skb: add gfp to kernel doc for kunit_zalloc_skb() Kuan-Wei Chiu pointed out that the kernel doc for kunit_zalloc_skb() needs to include the @gfp information. Add it. Reported-by: Kuan-Wei Chiu Closes: https://lore.kernel.org/all/Zy+VIXDPuU613fFd@visitorckw-System-Product-Name/ Signed-off-by: Dan Carpenter Reviewed-by: Kuan-Wei Chiu Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kunit/skbuff.h b/include/kunit/skbuff.h index 345e1e8f0312..07784694357c 100644 --- a/include/kunit/skbuff.h +++ b/include/kunit/skbuff.h @@ -20,8 +20,9 @@ static void kunit_action_kfree_skb(void *p) * kunit_zalloc_skb() - Allocate and initialize a resource managed skb. * @test: The test case to which the skb belongs * @len: size to allocate + * @gfp: allocation flags * - * Allocate a new struct sk_buff with GFP_KERNEL, zero fill the give length + * Allocate a new struct sk_buff with gfp flags, zero fill the given length * and add it as a resource to the kunit test for automatic cleanup. * * Returns: newly allocated SKB, or %NULL on error -- cgit v1.2.3 From 5a9d1b83e5334915c651604648c20a9fc64d47a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 17:09:18 +0100 Subject: block: return unsigned int from bdev_io_opt The underlying limit is defined as an unsigned int, so return that from bdev_io_opt as well. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241119160932.1327864-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 31867de88213..97c89c0c1398 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1275,7 +1275,7 @@ static inline unsigned int queue_io_opt(const struct request_queue *q) return q->limits.io_opt; } -static inline int bdev_io_opt(struct block_device *bdev) +static inline unsigned int bdev_io_opt(struct block_device *bdev) { return queue_io_opt(bdev_get_queue(bdev)); } -- cgit v1.2.3 From ed5db174cf39374215934f21b04639a7a1513023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 17:09:19 +0100 Subject: block: return unsigned int from queue_dma_alignment The underlying limit is defined as an unsigned int, so return that from queue_dma_alignment as well. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241119160932.1327864-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 97c89c0c1398..d8de261c2b99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1421,7 +1421,7 @@ static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) return is_seq; } -static inline int queue_dma_alignment(const struct request_queue *q) +static inline unsigned int queue_dma_alignment(const struct request_queue *q) { return q->limits.dma_alignment; } -- cgit v1.2.3 From e769489a54401d0c89555f7ad8672038b5c2b767 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 17:09:20 +0100 Subject: block: return unsigned int from blk_lim_dma_alignment_and_pad The underlying limits are defined as unsigned int, so return that from blk_lim_dma_alignment_and_pad as well. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241119160932.1327864-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d8de261c2b99..d0d8190429c8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1462,7 +1462,8 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev, bdev_logical_block_size(bdev) - 1); } -static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim) +static inline unsigned int +blk_lim_dma_alignment_and_pad(struct queue_limits *lim) { return lim->dma_alignment | lim->dma_pad_mask; } -- cgit v1.2.3 From da77d9b23700708d0d22a4407d32a8755a3596e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 17:09:21 +0100 Subject: block: return bool from blk_rq_aligned blk_rq_aligned returns a boolean condition, don't mascquerade it as int. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241119160932.1327864-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d0d8190429c8..5270117d54ac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1468,7 +1468,7 @@ blk_lim_dma_alignment_and_pad(struct queue_limits *lim) return lim->dma_alignment | lim->dma_pad_mask; } -static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, +static inline bool blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits); -- cgit v1.2.3 From e888810bc4f471f85989a0991aff28d2ac9f783b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 17:09:22 +0100 Subject: block: remove a duplicate definition for bdev_read_only bdev_read_only is already defined as an inline function in blkdev.h. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241119160932.1327864-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5270117d54ac..8b4e4692e7fb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1586,7 +1586,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } -int bdev_read_only(struct block_device *bdev); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); -- cgit v1.2.3 From 766a71ef65bb217ed8bf1c068ac14c7d3c15d487 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2024 17:09:23 +0100 Subject: block: return bool from get_disk_ro and bdev_read_only get_disk_ro and bdev_read_only return boolean conditions, don't masquerade them as int. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241119160932.1327864-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8b4e4692e7fb..08a727b40816 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -779,13 +779,13 @@ static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag) atomic_andnot(flag, &bdev->__bd_flags); } -static inline int get_disk_ro(struct gendisk *disk) +static inline bool get_disk_ro(struct gendisk *disk) { return bdev_test_flag(disk->part0, BD_READ_ONLY) || test_bit(GD_READ_ONLY, &disk->state); } -static inline int bdev_read_only(struct block_device *bdev) +static inline bool bdev_read_only(struct block_device *bdev) { return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk); } -- cgit v1.2.3 From e7619f2a2f8f9b10feb784ec6b8ea5320ad3b97e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Nov 2024 16:57:56 +0000 Subject: KVM: arm64: vgic: Kill VGIC_MAX_PRIVATE definition VGIC_MAX_PRIVATE is a pretty useless definition, and is better replaced with VGIC_NR_PRIVATE_IRQS. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241117165757.247686-4-maz@kernel.org Signed-off-by: Oliver Upton --- include/kvm/arm_vgic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f5172549f9ba..3a8ccfda34d2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,7 +26,6 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) #define VGIC_MAX_SPI 1019 #define VGIC_MAX_RESERVED 1023 #define VGIC_MIN_LPI 8192 -- cgit v1.2.3 From f46b9cdb22f7a167c36b6bcddaef7e8aee2598fa Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 20 Nov 2024 14:14:52 -0800 Subject: io_uring: limit local tw done Instead of eagerly running all available local tw, limit the amount of local tw done to the max of IO_LOCAL_TW_DEFAULT_MAX (20) or wait_nr. The value of 20 is chosen as a reasonable heuristic to allow enough work batching but also keep latency down. Add a retry_llist that maintains a list of local tw that couldn't be done in time. No synchronisation is needed since it is only modified within the task context. Signed-off-by: David Wei Link: https://lore.kernel.org/r/20241120221452.3762588-3-dw@davidwei.uk Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index aa5f5ea98076..3e934feb3187 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -335,6 +335,7 @@ struct io_ring_ctx { */ struct { struct llist_head work_llist; + struct llist_head retry_llist; unsigned long check_cq; atomic_t cq_wait_nr; atomic_t cq_timeouts; -- cgit v1.2.3 From b88cbaaa6fa109c2eb455d8fe2a318de0d197ea2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Nov 2024 15:44:27 -0600 Subject: PCI/pwrctrl: Rename pwrctl files to pwrctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To slightly reduce confusion between "pwrctl" (the power controller and power sequencing framework) and "bwctrl" (the bandwidth controller), rename "pwrctl" to "pwrctrl" so they use the same "ctrl" suffix. Rename drivers/pci/pwrctl/ to drivers/pci/pwrctrl/, including the related MAINTAINERS, include file (include/linux/pci-pwrctl.h), Makefile, and Kconfig changes. This is the minimal rename of files only. A subsequent commit will rename functions and data structures. Link: https://lore.kernel.org/r/20241115214428.2061153-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Acked-by: Krzysztof Wilczyński --- include/linux/pci-pwrctl.h | 54 --------------------------------------------- include/linux/pci-pwrctrl.h | 54 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 54 deletions(-) delete mode 100644 include/linux/pci-pwrctl.h create mode 100644 include/linux/pci-pwrctrl.h (limited to 'include') diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h deleted file mode 100644 index 0d23dddf59ec..000000000000 --- a/include/linux/pci-pwrctl.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2024 Linaro Ltd. - */ - -#ifndef __PCI_PWRCTL_H__ -#define __PCI_PWRCTL_H__ - -#include -#include - -struct device; -struct device_link; - -/* - * This is a simple framework for solving the issue of PCI devices that require - * certain resources (regulators, GPIOs, clocks) to be enabled before the - * device can actually be detected on the PCI bus. - * - * The idea is to reuse the platform bus to populate OF nodes describing the - * PCI device and its resources, let these platform devices probe and enable - * relevant resources and then trigger a rescan of the PCI bus allowing for the - * same device (with a second associated struct device) to be registered with - * the PCI subsystem. - * - * To preserve a correct hierarchy for PCI power management and device reset, - * we create a device link between the power control platform device (parent) - * and the supplied PCI device (child). - */ - -/** - * struct pci_pwrctl - PCI device power control context. - * @dev: Address of the power controlling device. - * - * An object of this type must be allocated by the PCI power control device and - * passed to the pwrctl subsystem to trigger a bus rescan and setup a device - * link with the device once it's up. - */ -struct pci_pwrctl { - struct device *dev; - - /* Private: don't use. */ - struct notifier_block nb; - struct device_link *link; - struct work_struct work; -}; - -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl); - -#endif /* __PCI_PWRCTL_H__ */ diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h new file mode 100644 index 000000000000..0d23dddf59ec --- /dev/null +++ b/include/linux/pci-pwrctrl.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __PCI_PWRCTL_H__ +#define __PCI_PWRCTL_H__ + +#include +#include + +struct device; +struct device_link; + +/* + * This is a simple framework for solving the issue of PCI devices that require + * certain resources (regulators, GPIOs, clocks) to be enabled before the + * device can actually be detected on the PCI bus. + * + * The idea is to reuse the platform bus to populate OF nodes describing the + * PCI device and its resources, let these platform devices probe and enable + * relevant resources and then trigger a rescan of the PCI bus allowing for the + * same device (with a second associated struct device) to be registered with + * the PCI subsystem. + * + * To preserve a correct hierarchy for PCI power management and device reset, + * we create a device link between the power control platform device (parent) + * and the supplied PCI device (child). + */ + +/** + * struct pci_pwrctl - PCI device power control context. + * @dev: Address of the power controlling device. + * + * An object of this type must be allocated by the PCI power control device and + * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * link with the device once it's up. + */ +struct pci_pwrctl { + struct device *dev; + + /* Private: don't use. */ + struct notifier_block nb; + struct device_link *link; + struct work_struct work; +}; + +void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); +int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); +void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); +int devm_pci_pwrctl_device_set_ready(struct device *dev, + struct pci_pwrctl *pwrctl); + +#endif /* __PCI_PWRCTL_H__ */ -- cgit v1.2.3 From 3f925cd6287401bbc9d568f56d796a69c8bd292a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Nov 2024 15:44:28 -0600 Subject: PCI/pwrctrl: Rename pwrctrl functions and structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename pwrctrl functions and structures from "pwrctl" to "pwrctrl" to match the similar file renames. Link: https://lore.kernel.org/r/20241115214428.2061153-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Acked-by: Krzysztof Wilczyński --- include/linux/pci-pwrctrl.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 0d23dddf59ec..7d439b0675e9 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -3,8 +3,8 @@ * Copyright (C) 2024 Linaro Ltd. */ -#ifndef __PCI_PWRCTL_H__ -#define __PCI_PWRCTL_H__ +#ifndef __PCI_PWRCTRL_H__ +#define __PCI_PWRCTRL_H__ #include #include @@ -29,14 +29,14 @@ struct device_link; */ /** - * struct pci_pwrctl - PCI device power control context. + * struct pci_pwrctrl - PCI device power control context. * @dev: Address of the power controlling device. * * An object of this type must be allocated by the PCI power control device and - * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * passed to the pwrctrl subsystem to trigger a bus rescan and setup a device * link with the device once it's up. */ -struct pci_pwrctl { +struct pci_pwrctrl { struct device *dev; /* Private: don't use. */ @@ -45,10 +45,10 @@ struct pci_pwrctl { struct work_struct work; }; -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl); +void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev); +int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl); +void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl); +int devm_pci_pwrctrl_device_set_ready(struct device *dev, + struct pci_pwrctrl *pwrctrl); -#endif /* __PCI_PWRCTL_H__ */ +#endif /* __PCI_PWRCTRL_H__ */ -- cgit v1.2.3 From 3273d8ad947dea925a65a78ca29e5351c960c801 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Nov 2024 09:25:54 +0800 Subject: f2fs: fix to do cast in F2FS_{BLK_TO_BYTES, BTYES_TO_BLK} to avoid overflow It missed to cast variable to unsigned long long type before bit shift, which will cause overflow, fix it. Fixes: f7ef9b83b583 ("f2fs: introduce macros to convert bytes and blocks in f2fs") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b0b821edfd97..3b2ad444c002 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -24,10 +24,10 @@ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ #define COMPRESS_ADDR ((block_t)-2) /* used as compressed data flag */ -#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) -#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BYTES_TO_BLK(bytes) ((unsigned long long)(bytes) >> F2FS_BLKSIZE_BITS) +#define F2FS_BLK_TO_BYTES(blk) ((unsigned long long)(blk) << F2FS_BLKSIZE_BITS) #define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) -#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1)) +#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1)) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -- cgit v1.2.3 From 77569f785c8624fa4189795fb52e635a973672e5 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 8 Nov 2024 09:25:56 +0800 Subject: f2fs: fix to adjust appropriate length for fiemap If user give a file size as "length" parameter for fiemap operations, but if this size is non-block size aligned, it will show 2 segments fiemap results even this whole file is contiguous on disk, such as the following results: ./f2fs_io fiemap 0 19034 ylog/analyzer.py Fiemap: offset = 0 len = 19034 logical addr. physical addr. length flags 0 0000000000000000 0000000020baa000 0000000000004000 00001000 1 0000000000004000 0000000020bae000 0000000000001000 00001001 after this patch: ./f2fs_io fiemap 0 19034 ylog/analyzer.py Fiemap: offset = 0 len = 19034 logical addr. physical addr. length flags 0 0000000000000000 00000000315f3000 0000000000005000 00001001 Signed-off-by: Zhiguo Niu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 3b2ad444c002..c24f8bc01045 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -24,6 +24,7 @@ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ #define COMPRESS_ADDR ((block_t)-2) /* used as compressed data flag */ +#define F2FS_BLKSIZE_MASK (F2FS_BLKSIZE - 1) #define F2FS_BYTES_TO_BLK(bytes) ((unsigned long long)(bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((unsigned long long)(blk) << F2FS_BLKSIZE_BITS) #define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) -- cgit v1.2.3 From 7d2f9f870f26798699585f96fa7a2a2cab38dc02 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 20 Nov 2024 15:10:07 +0800 Subject: nvme: introduce change ptpl and iekey definition This is for the next tuning pr code more readble patch, make linux/nvme.h's changes separately. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2baf9a80b470..13377dde4527 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2171,4 +2171,13 @@ enum nvme_pr_release_action { NVME_PR_RELEASE_ACT_CLEAR = 1, }; +enum nvme_pr_change_ptpl { + NVME_PR_CPTPL_NO_CHANGE = 0, + NVME_PR_CPTPL_RESV = 1 << 30, + NVME_PR_CPTPL_CLEARED = 2 << 30, + NVME_PR_CPTPL_PERSIST = 3 << 30, +}; + +#define NVME_PR_IGNORE_KEY (1 << 3) + #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From 64214c2b95364d26cdff045d8bbefd37380edbe1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 14 Nov 2024 15:55:30 -0400 Subject: iommu: Add ops->domain_alloc_nested() It turns out all the drivers that are using this immediately call into another function, so just make that function directly into the op. This makes paging=NULL for domain_alloc_user and we can remove the argument in the next patch. The function mirrors the similar op in the viommu that allocates a nested domain on top of the viommu's nesting parent. This version supports cases where a viommu is not being used. Link: https://patch.msgid.link/r/1-v1-c252ebdeb57b+329-iommu_paging_flags_jgg@nvidia.com Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d6aaaec3caf4..0472cc124519 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -559,15 +559,13 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * the caller iommu_domain_alloc() returns. * @domain_alloc_user: Allocate an iommu domain corresponding to the input * parameters as defined in include/uapi/linux/iommufd.h. - * Upon success, if the @user_data is valid and the @parent - * points to a kernel-managed domain, the new domain must be - * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be - * NULL while the @user_data can be optionally provided, the + * The @user_data can be optionally provided, the * new domain must support __IOMMU_DOMAIN_PAGING. * Upon failure, ERR_PTR must be returned. * @domain_alloc_paging: Allocate an iommu_domain that can be used for * UNMANAGED, DMA, and DMA_FQ domain types. * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing. + * @domain_alloc_nested: Allocate an iommu_domain for nested translation. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -622,6 +620,9 @@ struct iommu_ops { struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_domain *(*domain_alloc_sva)(struct device *dev, struct mm_struct *mm); + struct iommu_domain *(*domain_alloc_nested)( + struct device *dev, struct iommu_domain *parent, u32 flags, + const struct iommu_user_data *user_data); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); -- cgit v1.2.3 From d53764723ecd639a0cc0c5ad24146847fc09f78d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 14 Nov 2024 15:55:31 -0400 Subject: iommu: Rename ops->domain_alloc_user() to domain_alloc_paging_flags() Now that the main domain allocating path is calling this function it doesn't make sense to leave it named _user. Change the name to alloc_paging_flags() to mirror the new iommu_paging_domain_alloc_flags() function. A driver should implement only one of ops->domain_alloc_paging() or ops->domain_alloc_paging_flags(). The former is a simpler interface with less boiler plate that the majority of drivers use. The latter is for drivers with a greater feature set (PASID, multiple page table support, advanced iommufd support, nesting, etc). Additional patches will be needed to achieve this. Link: https://patch.msgid.link/r/2-v1-c252ebdeb57b+329-iommu_paging_flags_jgg@nvidia.com Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0472cc124519..1e3308e89996 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -557,13 +557,17 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * @domain_alloc: allocate and return an iommu domain if success. Otherwise * NULL is returned. The domain is not fully initialized until * the caller iommu_domain_alloc() returns. - * @domain_alloc_user: Allocate an iommu domain corresponding to the input - * parameters as defined in include/uapi/linux/iommufd.h. - * The @user_data can be optionally provided, the - * new domain must support __IOMMU_DOMAIN_PAGING. - * Upon failure, ERR_PTR must be returned. + * @domain_alloc_paging_flags: Allocate an iommu domain corresponding to the + * input parameters as defined in + * include/uapi/linux/iommufd.h. The @user_data can be + * optionally provided, the new domain must support + * __IOMMU_DOMAIN_PAGING. Upon failure, ERR_PTR must be + * returned. * @domain_alloc_paging: Allocate an iommu_domain that can be used for - * UNMANAGED, DMA, and DMA_FQ domain types. + * UNMANAGED, DMA, and DMA_FQ domain types. This is the + * same as invoking domain_alloc_paging_flags() with + * @flags=0, @user_data=NULL. A driver should implement + * only one of the two ops. * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing. * @domain_alloc_nested: Allocate an iommu_domain for nested translation. * @probe_device: Add device to iommu driver handling @@ -614,8 +618,8 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); - struct iommu_domain *(*domain_alloc_user)( - struct device *dev, u32 flags, struct iommu_domain *parent, + struct iommu_domain *(*domain_alloc_paging_flags)( + struct device *dev, u32 flags, const struct iommu_user_data *user_data); struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_domain *(*domain_alloc_sva)(struct device *dev, -- cgit v1.2.3 From 9d8a2b033db179bef9b6b5bad492f611a0fe89b7 Mon Sep 17 00:00:00 2001 From: Miao Wang Date: Thu, 21 Nov 2024 22:25:21 +0800 Subject: ACPI: introduce acpi_arch_init() To avoid arch-specific code in general ACPI initialization flow, introduce a weak symbol acpi_arch_init(). Currently, arm64 and riscv can utillize this to insert their arch-specific flow. In the future, other architectures can also have a chance to define their own arch-specific ACPI initialization process if necessary. Reviewed-by: Sunil V L Reviewed-by: Sudeep Holla Acked-by: Hanjun Guo Signed-off-by: Miao Wang Link: https://patch.msgid.link/20241121-intro-acpi-arch-init-v4-1-b1fb517e7d8b@gmail.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7dd24acd9ffe..05f39fbfa485 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1530,17 +1530,7 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) } #endif -#ifdef CONFIG_ARM64 -void acpi_arm_init(void); -#else -static inline void acpi_arm_init(void) { } -#endif - -#ifdef CONFIG_RISCV -void acpi_riscv_init(void); -#else -static inline void acpi_riscv_init(void) { } -#endif +void acpi_arch_init(void); #ifdef CONFIG_ACPI_PCC void acpi_init_pcc(void); -- cgit v1.2.3 From 0172afefbfbdd8987787c926b40b68400bd1c3d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Nov 2024 21:28:49 +0100 Subject: tracing: Record task flag NEED_RESCHED_LAZY. The scheduler added NEED_RESCHED_LAZY scheduling. Record this state as part of trace flags and expose it in the need_resched field. Record and expose NEED_RESCHED_LAZY. [bigeasy: Commit description, documentation bits.] Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241122202849.7DfYpJR0@linutronix.de Reviewed-by: Ankur Arora Reviewed-by: Steven Rostedt (Google) Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 016b29a56c87..2a5df5b62cfc 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -184,6 +184,7 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED_LAZY = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, -- cgit v1.2.3 From 306d40aa53b671ea72c3bf272f85ccb6c1b0bc65 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 23 Nov 2024 10:30:27 -0500 Subject: tracing: Move it_func[0] comment to the relevant context When introducing __DO_TRACE_CALL(), the iteration over it_func moved from __DO_TRACE() to __tracepoint_iter_##_name(), but the comment relevant for this iterator was left in its original location. Move the comment to the relevant context. Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()") Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Link: https://lore.kernel.org/20241123153031.2884933-2-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 425123e921ac..d390e8cabf02 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -210,9 +210,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #endif /* CONFIG_HAVE_STATIC_CALL */ /* - * it_func[0] is never NULL because there is at least one element in the array - * when the array itself is non NULL. - * * With @syscall=0, the tracepoint callback array dereference is * protected by disabling preemption. * With @syscall=1, the tracepoint callback array dereference is @@ -316,6 +313,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. + * + * it_func[0] is never NULL because there is at least one element in the array + * when the array itself is non NULL. */ #define __DEFINE_TRACE_EXT(_name, _ext, proto, args) \ static const char __tpstrtab_##_name[] \ -- cgit v1.2.3 From 89c7e17f303e2d56d50a162bb65d786d3a43963e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 23 Nov 2024 10:30:28 -0500 Subject: tracing: Remove __idx variable from __DO_TRACE Since the removal of SRCU-protected tracepoints, the __idx variable in __DO_TRACE is unused. Remove this variable. Fixes: 48bcda684823 ("tracing: Remove definition of trace_*_rcuidle()") Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Link: https://lore.kernel.org/20241123153031.2884933-3-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d390e8cabf02..867f3c1ac7dc 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -218,8 +218,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ #define __DO_TRACE(name, args, cond, syscall) \ do { \ - int __maybe_unused __idx = 0; \ - \ if (!(cond)) \ return; \ \ -- cgit v1.2.3 From 7c565a4d4e437034755a06b7d61361add3b98882 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 23 Nov 2024 10:30:29 -0500 Subject: rcupdate_trace: Define rcu_tasks_trace lock guard Define a rcu_tasks_trace lock guard for use by the syscall enter/exit tracepoints. Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/20241123153031.2884933-4-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/rcupdate_trace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index eda493200663..e6c44eb428ab 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -10,6 +10,7 @@ #include #include +#include extern struct lockdep_map rcu_trace_lock_map; @@ -98,4 +99,8 @@ static inline void rcu_read_lock_trace(void) { BUG(); } static inline void rcu_read_unlock_trace(void) { BUG(); } #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ +DEFINE_LOCK_GUARD_0(rcu_tasks_trace, + rcu_read_lock_trace(), + rcu_read_unlock_trace()) + #endif /* __LINUX_RCUPDATE_TRACE_H */ -- cgit v1.2.3 From 98bf0fbb65226809a8df4d1948c5b6cf0c91590f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 23 Nov 2024 10:30:30 -0500 Subject: tracing: Remove conditional locking from __DO_TRACE() Remove conditional locking by moving the __DO_TRACE() code into trace_##name(). When the faultable syscall tracepoints were implemented, __DO_TRACE() had a rcuidle argument which selected between SRCU and preempt disable. Therefore, the RCU tasks trace protection for faultable syscall tracepoints was introduced using the same pattern. At that point, it did not appear obvious that this feedback from Linus [1] applied here as well, because the __DO_TRACE() modification was extending a pre-existing pattern. Shortly before pulling the faultable syscall tracepoints modifications, Steven removed the rcuidle argument and SRCU protection scheme entirely from tracepoint.h: commit 48bcda684823 ("tracing: Remove definition of trace_*_rcuidle()") This required a rebase of the faultable syscall tracepoints series, which missed a perfect opportunity to integrate the prior recommendation from Linus. In response to the pull request, Linus pointed out [2] that he was not pleased by the implementation, expecting this to be fixed in a follow up patch series. Move __DO_TRACE() code into trace_##name() within each of __DECLARE_TRACE() and __DECLARE_TRACE_SYSCALL(). Use a scoped guard to guard the preempt disable notrace and RCU tasks trace critical sections. Link: https://lore.kernel.org/all/CAHk-=wggDLDeTKbhb5hh--x=-DQd69v41137M72m6NOTmbD-cw@mail.gmail.com/ [1] Link: https://lore.kernel.org/lkml/CAHk-=witPrLcu22dZ93VCyRQonS7+-dFYhQbna=KBa-TAhayMw@mail.gmail.com/ [2] Fixes: a363d27cdbc2 ("tracing: Allow system call tracepoints to handle page faults") Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/20241123153031.2884933-5-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 867f3c1ac7dc..832f49b56b1f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -209,31 +209,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ -/* - * With @syscall=0, the tracepoint callback array dereference is - * protected by disabling preemption. - * With @syscall=1, the tracepoint callback array dereference is - * protected by Tasks Trace RCU, which allows probes to handle page - * faults. - */ -#define __DO_TRACE(name, args, cond, syscall) \ - do { \ - if (!(cond)) \ - return; \ - \ - if (syscall) \ - rcu_read_lock_trace(); \ - else \ - preempt_disable_notrace(); \ - \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - \ - if (syscall) \ - rcu_read_unlock_trace(); \ - else \ - preempt_enable_notrace(); \ - } while (0) - /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -282,10 +257,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ static inline void trace_##name(proto) \ { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond), 0); \ + if (static_branch_unlikely(&__tracepoint_##name.key)) { \ + if (cond) { \ + scoped_guard(preempt_notrace) \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ + } \ + } \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ @@ -297,10 +274,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline void trace_##name(proto) \ { \ might_fault(); \ - if (static_branch_unlikely(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond), 1); \ + if (static_branch_unlikely(&__tracepoint_##name.key)) { \ + if (cond) { \ + scoped_guard(rcu_tasks_trace) \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ + } \ + } \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ -- cgit v1.2.3 From ef0d4186083127d2f99ed04e051fd94ba061d253 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 23 Nov 2024 10:30:31 -0500 Subject: tracing: Remove cond argument from __DECLARE_TRACE_SYSCALL Syscall tracepoints do not require a "cond" argument, because they are meant to be used only for sys_enter and sys_exit instrumentation, which don't require condition evaluation. Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/20241123153031.2884933-6-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 832f49b56b1f..b2633a72e871 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -220,7 +220,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * site if it is not watching, as it will need to be active when the * tracepoint is enabled. */ -#define __DECLARE_TRACE_COMMON(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ @@ -254,7 +254,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ - __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void trace_##name(proto) \ { \ if (static_branch_unlikely(&__tracepoint_##name.key)) { \ @@ -269,18 +269,16 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } \ } -#define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto) \ - __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ +#define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void trace_##name(proto) \ { \ might_fault(); \ if (static_branch_unlikely(&__tracepoint_##name.key)) { \ - if (cond) { \ - scoped_guard(rcu_tasks_trace) \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - } \ + scoped_guard(rcu_tasks_trace) \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ - if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ + if (IS_ENABLED(CONFIG_LOCKDEP)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ @@ -363,7 +361,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #else /* !TRACEPOINTS_ENABLED */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ static inline void trace_##name(proto) \ { } \ static inline int \ @@ -387,7 +385,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return false; \ } -#define __DECLARE_TRACE_SYSCALL __DECLARE_TRACE +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) + +#define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) #define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args) @@ -453,7 +455,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define DECLARE_TRACE_SYSCALL(name, proto, args) \ __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ - cpu_online(raw_smp_processor_id()), \ PARAMS(void *__data, proto)) #define TRACE_EVENT_FLAGS(event, flag) -- cgit v1.2.3 From 7f9e19f207be0c534d517d65e01417ba968cdd34 Mon Sep 17 00:00:00 2001 From: Adam Young Date: Wed, 20 Nov 2024 14:02:14 -0500 Subject: mailbox: pcc: Check before sending MCTP PCC response ACK Type 4 PCC channels have an option to send back a response to the platform when they are done processing the request. The flag to indicate whether or not to respond is inside the message body, and thus is not available to the pcc mailbox. If the flag is not set, still set command completion bit after processing message. In order to read the flag, this patch maps the shared buffer to virtual memory. To avoid duplication of mapping the shared buffer is then made available to be used by the driver that uses the mailbox. Signed-off-by: Adam Young Cc: Sudeep Holla Signed-off-by: Jassi Brar --- include/acpi/pcc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 9b373d172a77..699c1a37b8e7 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -12,6 +12,7 @@ struct pcc_mbox_chan { struct mbox_chan *mchan; u64 shmem_base_addr; + void __iomem *shmem; u64 shmem_size; u32 latency; u32 max_access_rate; @@ -31,11 +32,13 @@ struct pcc_mbox_chan { #define PCC_CMD_COMPLETION_NOTIFY BIT(0) #define MAX_PCC_SUBSPACES 256 +#define PCC_ACK_FLAG_MASK 0x1 #ifdef CONFIG_PCC extern struct pcc_mbox_chan * pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id); extern void pcc_mbox_free_channel(struct pcc_mbox_chan *chan); +extern int pcc_mbox_ioremap(struct mbox_chan *chan); #else static inline struct pcc_mbox_chan * pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) @@ -43,6 +46,10 @@ pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) return ERR_PTR(-ENODEV); } static inline void pcc_mbox_free_channel(struct pcc_mbox_chan *chan) { } +static inline int pcc_mbox_ioremap(struct mbox_chan *chan) +{ + return 0; +}; #endif #endif /* _PCC_H */ -- cgit v1.2.3 From 2bd9b57d04df417f31ef54448477c212fcdd14fc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 25 Nov 2024 09:25:14 -0500 Subject: tracing: Use guard() rather than scoped_guard() Using scoped_guard() in the implementation of trace_##name() adds an unnecessary level of indentation. Cc: Steven Rostedt Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Cc: Jordan Rife Link: https://lore.kernel.org/20241125142514.2897143-1-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index b2633a72e871..e398f6e43f61 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -259,8 +259,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { \ if (static_branch_unlikely(&__tracepoint_##name.key)) { \ if (cond) { \ - scoped_guard(preempt_notrace) \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ + guard(preempt_notrace)(); \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ } \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ @@ -275,8 +275,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { \ might_fault(); \ if (static_branch_unlikely(&__tracepoint_##name.key)) { \ - scoped_guard(rcu_tasks_trace) \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ + guard(rcu_tasks_trace)(); \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ if (IS_ENABLED(CONFIG_LOCKDEP)) { \ WARN_ONCE(!rcu_is_watching(), \ -- cgit v1.2.3 From 222974c6ec9d901f7ad13bbe6e505ec1f1d822d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Nov 2024 18:45:33 -0800 Subject: iommu: remove stale declaration left over by a merge conflict The merge commit ae3325f752ef ("Merge branches 'arm/smmu', 'mediatek', 's390', 'ti/omap', 'riscv' and 'core' into next") left a stale declaration of 'iommu_present()' even though the 'core' branch that was merged had removed the function (and the declaration). Remove it for real. Reported-by: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Joerg Roedel Signed-off-by: Linus Torvalds --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d6aaaec3caf4..301e97d745c1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -841,7 +841,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags); -- cgit v1.2.3 From 0043ecea2399ffc8bfd99ed9dbbe766e7c79293c Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Sat, 2 Nov 2024 10:51:10 -0700 Subject: vmlinux.lds.h: Adjust symbol ordering in text output section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the -ffunction-sections compiler option is enabled, each function is placed in a separate section named .text.function_name rather than putting all functions in a single .text section. However, using -function-sections can cause problems with the linker script. The comments included in include/asm-generic/vmlinux.lds.h note these issues.: “TEXT_MAIN here will match .text.fixup and .text.unlikely if dead code elimination is enabled, so these sections should be converted to use ".." first.” It is unclear whether there is a straightforward method for converting a suffix to "..". This patch modifies the order of subsections within the text output section. Specifically, it changes current order: .text.hot, .text, .text_unlikely, .text.unknown, .text.asan to the new order: .text.asan, .text.unknown, .text_unlikely, .text.hot, .text Here is the rationale behind the new layout: The majority of the code resides in three sections: .text.hot, .text, and .text.unlikely, with .text.unknown containing a negligible amount. .text.asan is only generated in ASAN builds. The primary goal is to group code segments based on their execution frequency (hotness). First, we want to place .text.hot adjacent to .text. Since we cannot put .text.hot after .text (Due to constraints with -ffunction-sections, placing .text.hot after .text is problematic), we need to put .text.hot before .text. Then it comes to .text.unlikely, we cannot put it after .text (same -ffunction-sections issue) . Therefore, we position .text.unlikely before .text.hot. .text.unknown and .tex.asan follow the same logic. This revised ordering effectively reverses the original arrangement (for .text.unlikely, .text.unknown, and .tex.asan), maintaining a similar level of affinity between sections. It also places .text.hot section at the beginning of a page to better utilize the TLB entry. Note that the limitation arises because the linker script employs glob patterns instead of regular expressions for string matching. While there is a method to maintain the current order using complex patterns, this significantly complicates the pattern and increases the likelihood of errors. This patch also changes vmlinux.lds.S for the sparc64 architecture to accommodate specific symbol placement requirements. Co-developed-by: Han Shen Signed-off-by: Han Shen Signed-off-by: Rong Xu Suggested-by: Sriraman Tallam Suggested-by: Krzysztof Pszeniczny Tested-by: Yonghong Song Tested-by: Yabin Cui Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eeadbaeccf88..fd901951549c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -553,19 +553,24 @@ * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map * - * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead - * code elimination is enabled, so these sections should be converted - * to use ".." first. + * TEXT_MAIN here will match symbols with a fixed pattern (for example, + * .text.hot or .text.unlikely) if dead code elimination or + * function-section is enabled. Match these symbols first before + * TEXT_MAIN to ensure they are grouped together. + * + * Also placing .text.hot section at the beginning of a page, this + * would help the TLB performance. */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ + *(.text.asan.* .text.tsan.*) \ + *(.text.unknown .text.unknown.*) \ + *(.text.unlikely .text.unlikely.*) \ + . = ALIGN(PAGE_SIZE); \ *(.text.hot .text.hot.*) \ *(TEXT_MAIN .text.fixup) \ - *(.text.unlikely .text.unlikely.*) \ - *(.text.unknown .text.unknown.*) \ NOINSTR_TEXT \ - *(.ref.text) \ - *(.text.asan.* .text.tsan.*) + *(.ref.text) /* sched.text is aling to function alignment to secure we have same -- cgit v1.2.3 From db0b2991ae1aac5ca985ec6fd8ff9bd9b2126c9b Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Sat, 2 Nov 2024 10:51:11 -0700 Subject: vmlinux.lds.h: Add markers for text_unlikely and text_hot sections Add markers like __hot_text_start, __hot_text_end, __unlikely_text_start, and __unlikely_text_end which will be included in System.map. These markers indicate how the compiler groups functions, providing valuable information to developers about the layout and optimization of the code. Co-developed-by: Han Shen Signed-off-by: Han Shen Signed-off-by: Rong Xu Suggested-by: Sriraman Tallam Tested-by: Yonghong Song Tested-by: Yabin Cui Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fd901951549c..e02973f3b418 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -549,6 +549,16 @@ __cpuidle_text_end = .; \ __noinstr_text_end = .; +#define TEXT_UNLIKELY \ + __unlikely_text_start = .; \ + *(.text.unlikely .text.unlikely.*) \ + __unlikely_text_end = .; + +#define TEXT_HOT \ + __hot_text_start = .; \ + *(.text.hot .text.hot.*) \ + __hot_text_end = .; + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map @@ -565,9 +575,9 @@ ALIGN_FUNCTION(); \ *(.text.asan.* .text.tsan.*) \ *(.text.unknown .text.unknown.*) \ - *(.text.unlikely .text.unlikely.*) \ + TEXT_UNLIKELY \ . = ALIGN(PAGE_SIZE); \ - *(.text.hot .text.hot.*) \ + TEXT_HOT \ *(TEXT_MAIN .text.fixup) \ NOINSTR_TEXT \ *(.ref.text) -- cgit v1.2.3 From 0847420f5e499a7ab518942fff71482179290163 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Sat, 2 Nov 2024 10:51:12 -0700 Subject: AutoFDO: Enable -ffunction-sections for the AutoFDO build Enable -ffunction-sections by default for the AutoFDO build. With -ffunction-sections, the compiler places each function in its own section named .text.function_name instead of placing all functions in the .text section. In the AutoFDO build, this allows the linker to utilize profile information to reorganize functions for improved utilization of iCache and iTLB. Co-developed-by: Han Shen Signed-off-by: Han Shen Signed-off-by: Rong Xu Suggested-by: Sriraman Tallam Tested-by: Yonghong Song Tested-by: Yabin Cui Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e02973f3b418..bd64fdedabd2 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -95,18 +95,25 @@ * With LTO_CLANG, the linker also splits sections by default, so we need * these macros to combine the sections during the final link. * + * With AUTOFDO_CLANG, by default, the linker splits text sections and + * regroups functions into subsections. + * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ +defined(CONFIG_AUTOFDO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#else +#define TEXT_MAIN .text +#endif +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else -#define TEXT_MAIN .text #define DATA_MAIN .data #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata -- cgit v1.2.3 From 2fd65f7afd5a73b685a1651cb651ade120b53e15 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Sat, 2 Nov 2024 10:51:13 -0700 Subject: AutoFDO: Enable machine function split optimization for AutoFDO Enable the machine function split optimization for AutoFDO in Clang. Machine function split (MFS) is a pass in the Clang compiler that splits a function into hot and cold parts. The linker groups all cold blocks across functions together. This decreases hot code fragmentation and improves iCache and iTLB utilization. MFS requires a profile so this is enabled only for the AutoFDO builds. Co-developed-by: Han Shen Signed-off-by: Han Shen Signed-off-by: Rong Xu Suggested-by: Sriraman Tallam Suggested-by: Krzysztof Pszeniczny Tested-by: Yonghong Song Tested-by: Yabin Cui Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index bd64fdedabd2..8a0bb3946cf0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -556,6 +556,11 @@ defined(CONFIG_AUTOFDO_CLANG) __cpuidle_text_end = .; \ __noinstr_text_end = .; +#define TEXT_SPLIT \ + __split_text_start = .; \ + *(.text.split .text.split.[0-9a-zA-Z_]*) \ + __split_text_end = .; + #define TEXT_UNLIKELY \ __unlikely_text_start = .; \ *(.text.unlikely .text.unlikely.*) \ @@ -582,6 +587,7 @@ defined(CONFIG_AUTOFDO_CLANG) ALIGN_FUNCTION(); \ *(.text.asan.* .text.tsan.*) \ *(.text.unknown .text.unknown.*) \ + TEXT_SPLIT \ TEXT_UNLIKELY \ . = ALIGN(PAGE_SIZE); \ TEXT_HOT \ @@ -589,7 +595,6 @@ defined(CONFIG_AUTOFDO_CLANG) NOINSTR_TEXT \ *(.ref.text) - /* sched.text is aling to function alignment to secure we have same * address even at second ld pass when generating System.map */ #define SCHED_TEXT \ -- cgit v1.2.3 From d5dc95836147f2e25b134c0ca3a0bc1a5867ea29 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Sat, 2 Nov 2024 10:51:14 -0700 Subject: kbuild: Add Propeller configuration for kernel build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the build support for using Clang's Propeller optimizer. Like AutoFDO, Propeller uses hardware sampling to gather information about the frequency of execution of different code paths within a binary. This information is then used to guide the compiler's optimization decisions, resulting in a more efficient binary. The support requires a Clang compiler LLVM 19 or later, and the create_llvm_prof tool (https://github.com/google/autofdo/releases/tag/v0.30.1). This commit is limited to x86 platforms that support PMU features like LBR on Intel machines and AMD Zen3 BRS. Here is an example workflow for building an AutoFDO+Propeller optimized kernel: 1) Build the kernel on the host machine, with AutoFDO and Propeller build config CONFIG_AUTOFDO_CLANG=y CONFIG_PROPELLER_CLANG=y then $ make LLVM=1 CLANG_AUTOFDO_PROFILE=” is the profile collected when doing a non-Propeller AutoFDO build. This step builds a kernel that has the same optimization level as AutoFDO, plus a metadata section that records basic block information. This kernel image runs as fast as an AutoFDO optimized kernel. 2) Install the kernel on test/production machines. 3) Run the load tests. The '-c' option in perf specifies the sample event period. We suggest using a suitable prime number, like 500009, for this purpose. For Intel platforms: $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c \ -o -- For AMD platforms: The supported system are: Zen3 with BRS, or Zen4 with amd_lbr_v2 # To see if Zen3 support LBR: $ cat proc/cpuinfo | grep " brs" # To see if Zen4 support LBR: $ cat proc/cpuinfo | grep amd_lbr_v2 # If the result is yes, then collect the profile using: $ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a \ -N -b -c -o -- 4) (Optional) Download the raw perf file to the host machine. 5) Generate Propeller profile: $ create_llvm_prof --binary= --profile= \ --format=propeller --propeller_output_module_name \ --out=_cc_profile.txt \ --propeller_symorder=_ld_profile.txt “create_llvm_prof” is the profile conversion tool, and a prebuilt binary for linux can be found on https://github.com/google/autofdo/releases/tag/v0.30.1 (can also build from source). "" can be something like "/home/user/dir/any_string". This command generates a pair of Propeller profiles: "_cc_profile.txt" and "_ld_profile.txt". 6) Rebuild the kernel using the AutoFDO and Propeller profile files. CONFIG_AUTOFDO_CLANG=y CONFIG_PROPELLER_CLANG=y and $ make LLVM=1 CLANG_AUTOFDO_PROFILE= \ CLANG_PROPELLER_PROFILE_PREFIX= Co-developed-by: Han Shen Signed-off-by: Han Shen Signed-off-by: Rong Xu Suggested-by: Sriraman Tallam Suggested-by: Krzysztof Pszeniczny Suggested-by: Nick Desaulniers Suggested-by: Stephane Eranian Tested-by: Yonghong Song Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a0bb3946cf0..c995474e4c64 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -95,14 +95,14 @@ * With LTO_CLANG, the linker also splits sections by default, so we need * these macros to combine the sections during the final link. * - * With AUTOFDO_CLANG, by default, the linker splits text sections and - * regroups functions into subsections. + * With AUTOFDO_CLANG and PROPELLER_CLANG, by default, the linker splits + * text sections and regroups functions into subsections. * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ -defined(CONFIG_AUTOFDO_CLANG) +defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text -- cgit v1.2.3 From bb43a59944f45e89aa158740b8a16ba8f0b0fa2b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Nov 2024 01:14:40 +0900 Subject: Rename .data.unlikely to .data..unlikely Commit 7ccaba5314ca ("consolidate WARN_...ONCE() static variables") was intended to collect all .data.unlikely sections into one chunk. However, this has not worked when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION or CONFIG_LTO_CLANG is enabled, because .data.unlikely matches the .data.[0-9a-zA-Z_]* pattern in the DATA_MAIN macro. Commit cb87481ee89d ("kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured") was introduced to suppress the issue for the default CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=n case, providing a minimal fix for stable backporting. We were aware this did not address the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. The plan was to apply correct fixes and then revert cb87481ee89d. [1] Seven years have passed since then, yet the #ifdef workaround remains in place. Using a ".." separator in the section name fixes the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and CONFIG_LTO_CLANG. [1]: https://lore.kernel.org/linux-kbuild/CAK7LNASck6BfdLnESxXUeECYL26yUDm0cwRZuM4gmaWUkxjL5g@mail.gmail.com/ Fixes: cb87481ee89d ("kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured") Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/rcupdate.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c995474e4c64..3c9dc1fd094d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -357,7 +357,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.data..decrypted) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ - *(.data.unlikely) \ + *(.data..unlikely) \ __start_once = .; \ *(.data.once) \ __end_once = .; \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 58d84c59f3dd..48e5c03df1dd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -401,7 +401,7 @@ static inline int debug_lockdep_rcu_enabled(void) */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ - static bool __section(".data.unlikely") __warned; \ + static bool __section(".data..unlikely") __warned; \ if (debug_lockdep_rcu_enabled() && (c) && \ debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ -- cgit v1.2.3 From dbefa1f31a91670c9e7dac9b559625336206466f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Nov 2024 01:14:41 +0900 Subject: Rename .data.once to .data..once to fix resetting WARN*_ONCE Commit b1fca27d384e ("kernel debug: support resetting WARN*_ONCE") added support for clearing the state of once warnings. However, it is not functional when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION or CONFIG_LTO_CLANG is enabled, because .data.once matches the .data.[0-9a-zA-Z_]* pattern in the DATA_MAIN macro. Commit cb87481ee89d ("kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured") was introduced to suppress the issue for the default CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=n case, providing a minimal fix for stable backporting. We were aware this did not address the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. The plan was to apply correct fixes and then revert cb87481ee89d. [1] Seven years have passed since then, yet the #ifdef workaround remains in place. Meanwhile, commit b1fca27d384e introduced the .data.once section, and commit dc5723b02e52 ("kbuild: add support for Clang LTO") extended the #ifdef. Using a ".." separator in the section name fixes the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and CONFIG_LTO_CLANG. [1]: https://lore.kernel.org/linux-kbuild/CAK7LNASck6BfdLnESxXUeECYL26yUDm0cwRZuM4gmaWUkxjL5g@mail.gmail.com/ Fixes: b1fca27d384e ("kernel debug: support resetting WARN*_ONCE") Fixes: dc5723b02e52 ("kbuild: add support for Clang LTO") Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/mmdebug.h | 6 +++--- include/linux/once.h | 4 ++-- include/linux/once_lite.h | 2 +- include/net/net_debug.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3c9dc1fd094d..54504013c749 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -359,7 +359,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.data..shared_aligned) /* percpu related */ \ *(.data..unlikely) \ __start_once = .; \ - *(.data.once) \ + *(.data..once) \ __end_once = .; \ STRUCT_ALIGN(); \ *(__tracepoints) \ diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 39a7714605a7..d7cb1e5ecbda 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -46,7 +46,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); } \ } while (0) #define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ - static bool __section(".data.once") __warned; \ + static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -66,7 +66,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); unlikely(__ret_warn); \ }) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ - static bool __section(".data.once") __warned; \ + static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -77,7 +77,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); unlikely(__ret_warn_once); \ }) #define VM_WARN_ON_ONCE_MM(cond, mm) ({ \ - static bool __section(".data.once") __warned; \ + static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/include/linux/once.h b/include/linux/once.h index bc714d414448..30346fcdc799 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -46,7 +46,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, #define DO_ONCE(func, ...) \ ({ \ bool ___ret = false; \ - static bool __section(".data.once") ___done = false; \ + static bool __section(".data..once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ @@ -64,7 +64,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, #define DO_ONCE_SLEEPABLE(func, ...) \ ({ \ bool ___ret = false; \ - static bool __section(".data.once") ___done = false; \ + static bool __section(".data..once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ ___ret = __do_once_sleepable_start(&___done); \ diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h index b7bce4983638..27de7bc32a06 100644 --- a/include/linux/once_lite.h +++ b/include/linux/once_lite.h @@ -12,7 +12,7 @@ #define __ONCE_LITE_IF(condition) \ ({ \ - static bool __section(".data.once") __already_done; \ + static bool __section(".data..once") __already_done; \ bool __ret_cond = !!(condition); \ bool __ret_once = false; \ \ diff --git a/include/net/net_debug.h b/include/net/net_debug.h index 1e74684cbbdb..4a79204c8d30 100644 --- a/include/net/net_debug.h +++ b/include/net/net_debug.h @@ -27,7 +27,7 @@ void netdev_info(const struct net_device *dev, const char *format, ...); #define netdev_level_once(level, dev, fmt, ...) \ do { \ - static bool __section(".data.once") __print_once; \ + static bool __section(".data..once") __print_once; \ \ if (!__print_once) { \ __print_once = true; \ -- cgit v1.2.3 From 81de291d86b704de1809cfb06672902d003cf3a3 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 6 Nov 2024 17:33:28 +0800 Subject: of: dynamic: Add of_changeset_update_prop_string Add a helper function to add string property updates to an OF changeset. This is similar to of_changeset_add_prop_string(), but instead of adding the property (and failing if it exists), it will update the property. This shall be used later in the DT hardware prober. Signed-off-by: Chen-Yu Tsai Reviewed-by: Rob Herring (Arm) Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- include/linux/of.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 086a60f3b8a6..d0307e3b093d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1651,6 +1651,10 @@ static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); } +int of_changeset_update_prop_string(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, const char *str); + int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np, const char *prop_name); -- cgit v1.2.3 From 1fcc67e3a354865775355eafec1fb061a755c971 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 6 Nov 2024 17:33:29 +0800 Subject: of: base: Add for_each_child_of_node_with_prefix() There are cases where drivers would go through child device nodes and operate on only the ones whose node name starts with a given prefix. Provide a helper for these users. This will mainly be used in a subsequent patch that implements a hardware component prober for I2C busses. Signed-off-by: Chen-Yu Tsai Reviewed-by: Rob Herring (Arm) Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- include/linux/of.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index d0307e3b093d..f921786cb8ac 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -289,6 +289,9 @@ extern struct device_node *of_get_parent(const struct device_node *node); extern struct device_node *of_get_next_parent(struct device_node *node); extern struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_next_child_with_prefix(const struct device_node *node, + struct device_node *prev, + const char *prefix); extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_reserved_child( @@ -1468,6 +1471,12 @@ static inline int of_property_read_s32(const struct device_node *np, child != NULL; \ child = of_get_next_child(parent, child)) +#define for_each_child_of_node_with_prefix(parent, child, prefix) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_child_with_prefix(parent, NULL, prefix); \ + child != NULL; \ + child = of_get_next_child_with_prefix(parent, child, prefix)) + #define for_each_available_child_of_node(parent, child) \ for (child = of_get_next_available_child(parent, NULL); child != NULL; \ child = of_get_next_available_child(parent, child)) -- cgit v1.2.3 From 157ce8f381efe264933e9366db828d845bade3a1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 6 Nov 2024 17:33:30 +0800 Subject: i2c: Introduce OF component probe function Some devices are designed and manufactured with some components having multiple drop-in replacement options. These components are often connected to the mainboard via ribbon cables, having the same signals and pin assignments across all options. These may include the display panel and touchscreen on laptops and tablets, and the trackpad on laptops. Sometimes which component option is used in a particular device can be detected by some firmware provided identifier, other times that information is not available, and the kernel has to try to probe each device. This change attempts to make the "probe each device" case cleaner. The current approach is to have all options added and enabled in the device tree. The kernel would then bind each device and run each driver's probe function. This works, but has been broken before due to the introduction of asynchronous probing, causing multiple instances requesting "shared" resources, such as pinmuxes, GPIO pins, interrupt lines, at the same time, with only one instance succeeding. Work arounds for these include moving the pinmux to the parent I2C controller, using GPIO hogs or pinmux settings to keep the GPIO pins in some fixed configuration, and requesting the interrupt line very late. Such configurations can be seen on the MT8183 Krane Chromebook tablets, and the Qualcomm sc8280xp-based Lenovo Thinkpad 13S. Instead of this delicate dance between drivers and device tree quirks, this change introduces a simple I2C component probe function. For a given class of devices on the same I2C bus, it will go through all of them, doing a simple I2C read transfer and see which one of them responds. It will then enable the device that responds. This requires some minor modifications in the existing device tree. The status for all the device nodes for the component options must be set to "fail-needs-probe". This makes it clear that some mechanism is needed to enable one of them, and also prevents the prober and device drivers running at the same time. Signed-off-by: Chen-Yu Tsai Reviewed-by: Andy Shevchenko Reviewed-by: Douglas Anderson Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- include/linux/i2c-of-prober.h | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 include/linux/i2c-of-prober.h (limited to 'include') diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h new file mode 100644 index 000000000000..e7e052ac9e48 --- /dev/null +++ b/include/linux/i2c-of-prober.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Definitions for the Linux I2C OF component prober + * + * Copyright (C) 2024 Google LLC + */ + +#ifndef _LINUX_I2C_OF_PROBER_H +#define _LINUX_I2C_OF_PROBER_H + +#include + +struct device; +struct device_node; + +/** + * struct i2c_of_probe_ops - I2C OF component prober callbacks + * + * A set of callbacks to be used by i2c_of_probe_component(). + * + * All callbacks are optional. Callbacks are called only once per run, and are + * used in the order they are defined in this structure. + * + * All callbacks that have return values shall return %0 on success, + * or a negative error number on failure. + * + * The @dev parameter passed to the callbacks is the same as @dev passed to + * i2c_of_probe_component(). It should only be used for dev_printk() calls + * and nothing else, especially not managed device resource (devres) APIs. + */ +struct i2c_of_probe_ops { + /** + * @enable: Retrieve and enable resources so that the components respond to probes. + * + * It is OK for this callback to return -EPROBE_DEFER since the intended use includes + * retrieving resources and enables them. Resources should be reverted to their initial + * state and released before returning if this fails. + */ + int (*enable)(struct device *dev, struct device_node *bus_node, void *data); + + /** + * @cleanup_early: Release exclusive resources prior to calling probe() on a + * detected component. + * + * Only called if a matching component is actually found. If none are found, + * resources that would have been released in this callback should be released in + * @free_resourcs_late instead. + */ + void (*cleanup_early)(struct device *dev, void *data); + + /** + * @cleanup: Opposite of @enable to balance refcounts and free resources after probing. + * + * Should check if resources were already freed by @cleanup_early. + */ + void (*cleanup)(struct device *dev, void *data); +}; + +/** + * struct i2c_of_probe_cfg - I2C OF component prober configuration + * @ops: Callbacks for the prober to use. + * @type: A string to match the device node name prefix to probe for. + */ +struct i2c_of_probe_cfg { + const struct i2c_of_probe_ops *ops; + const char *type; +}; + +#if IS_ENABLED(CONFIG_OF_DYNAMIC) + +int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cfg, void *ctx); + +#endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ + +#endif /* _LINUX_I2C_OF_PROBER_H */ -- cgit v1.2.3 From 897261149d255d03fc90bec6782e3835cacbfdde Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 6 Nov 2024 17:33:31 +0800 Subject: i2c: of-prober: Add simple helpers for regulator support Add helpers to do regulator management for the I2C OF component prober. Components that the prober intends to probe likely require their regulator supplies be enabled, and GPIOs be toggled to enable them or bring them out of reset before they will respond to probe attempts. GPIOs will be handled in the next patch. The assumption is that the same class of components to be probed are always connected in the same fashion with the same regulator supply and GPIO. The names may vary due to binding differences, but the physical layout does not change. This set of helpers supports at most one regulator supply. The user must specify the node from which the supply is retrieved. The supply name and the amount of time to wait after the supply is enabled are also given by the user. Signed-off-by: Chen-Yu Tsai Reviewed-by: Douglas Anderson Reviewed-by: Andy Shevchenko Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- include/linux/i2c-of-prober.h | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h index e7e052ac9e48..df95aa6ad90e 100644 --- a/include/linux/i2c-of-prober.h +++ b/include/linux/i2c-of-prober.h @@ -70,6 +70,50 @@ struct i2c_of_probe_cfg { int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cfg, void *ctx); +/** + * DOC: I2C OF component prober simple helpers + * + * Components such as trackpads are commonly connected to a devices baseboard + * with a 6-pin ribbon cable. That gives at most one voltage supply and one + * GPIO (commonly a "enable" or "reset" line) besides the I2C bus, interrupt + * pin, and common ground. Touchscreens, while integrated into the display + * panel's connection, typically have the same set of connections. + * + * A simple set of helpers are provided here for use with the I2C OF component + * prober. This implementation targets such components, allowing for at most + * one regulator supply. + * + * The following helpers are provided: + * * i2c_of_probe_simple_enable() + * * i2c_of_probe_simple_cleanup() + */ + +/** + * struct i2c_of_probe_simple_opts - Options for simple I2C component prober callbacks + * @res_node_compatible: Compatible string of device node to retrieve resources from. + * @supply_name: Name of regulator supply. + * @post_power_on_delay_ms: Delay after regulators are powered on. Passed to msleep(). + */ +struct i2c_of_probe_simple_opts { + const char *res_node_compatible; + const char *supply_name; + unsigned int post_power_on_delay_ms; +}; + +struct regulator; + +struct i2c_of_probe_simple_ctx { + /* public: provided by user before helpers are used. */ + const struct i2c_of_probe_simple_opts *opts; + /* private: internal fields for helpers. */ + struct regulator *supply; +}; + +int i2c_of_probe_simple_enable(struct device *dev, struct device_node *bus_node, void *data); +void i2c_of_probe_simple_cleanup(struct device *dev, void *data); + +extern struct i2c_of_probe_ops i2c_of_probe_simple_ops; + #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ #endif /* _LINUX_I2C_OF_PROBER_H */ -- cgit v1.2.3 From 39b415f84654892003cebb7c026b7daa3380610b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 6 Nov 2024 17:33:32 +0800 Subject: i2c: of-prober: Add GPIO support to simple helpers Add GPIO support to the simple helpers for the I2C OF component prober. Components that the prober intends to probe likely require their regulator supplies be enabled, and GPIOs be toggled to enable them or bring them out of reset before they will respond to probe attempts. Regulator supplies were handled in the previous patch. The assumption is that the same class of components to be probed are always connected in the same fashion with the same regulator supply and GPIO. The names may vary due to binding differences, but the physical layout does not change. This supports at most one GPIO pin. The user must specify the GPIO name, the polarity, and the amount of time to wait after the GPIO is toggled. Devices with more than one GPIO pin likely require specific power sequencing beyond what generic code can easily support. Signed-off-by: Chen-Yu Tsai Reviewed-by: Douglas Anderson Reviewed-by: Andy Shevchenko Tested-by: Andrey Skvortsov Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- include/linux/i2c-of-prober.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h index df95aa6ad90e..bb6d47f50ee5 100644 --- a/include/linux/i2c-of-prober.h +++ b/include/linux/i2c-of-prober.h @@ -9,6 +9,7 @@ #define _LINUX_I2C_OF_PROBER_H #include +#include struct device; struct device_node; @@ -85,6 +86,7 @@ int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cf * * The following helpers are provided: * * i2c_of_probe_simple_enable() + * * i2c_of_probe_simple_cleanup_early() * * i2c_of_probe_simple_cleanup() */ @@ -92,14 +94,31 @@ int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cf * struct i2c_of_probe_simple_opts - Options for simple I2C component prober callbacks * @res_node_compatible: Compatible string of device node to retrieve resources from. * @supply_name: Name of regulator supply. + * @gpio_name: Name of GPIO. NULL if no GPIO line is used. Empty string ("") if GPIO + * line is unnamed. * @post_power_on_delay_ms: Delay after regulators are powered on. Passed to msleep(). + * @post_gpio_config_delay_ms: Delay after GPIO is configured. Passed to msleep(). + * @gpio_assert_to_enable: %true if GPIO should be asserted, i.e. set to logical high, + * to enable the component. + * + * This describes power sequences common for the class of components supported by the + * simple component prober: + * * @gpio_name is configured to the non-active setting according to @gpio_assert_to_enable. + * * @supply_name regulator supply is enabled. + * * Wait for @post_power_on_delay_ms to pass. + * * @gpio_name is configured to the active setting according to @gpio_assert_to_enable. + * * Wait for @post_gpio_config_delay_ms to pass. */ struct i2c_of_probe_simple_opts { const char *res_node_compatible; const char *supply_name; + const char *gpio_name; unsigned int post_power_on_delay_ms; + unsigned int post_gpio_config_delay_ms; + bool gpio_assert_to_enable; }; +struct gpio_desc; struct regulator; struct i2c_of_probe_simple_ctx { @@ -107,9 +126,11 @@ struct i2c_of_probe_simple_ctx { const struct i2c_of_probe_simple_opts *opts; /* private: internal fields for helpers. */ struct regulator *supply; + struct gpio_desc *gpiod; }; int i2c_of_probe_simple_enable(struct device *dev, struct device_node *bus_node, void *data); +void i2c_of_probe_simple_cleanup_early(struct device *dev, void *data); void i2c_of_probe_simple_cleanup(struct device *dev, void *data); extern struct i2c_of_probe_ops i2c_of_probe_simple_ops; -- cgit v1.2.3 From 054a9cd395a79f4a5595f2cd24542e9bca8723c8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 Nov 2024 08:56:52 +0900 Subject: modpost: rename alias symbol for MODULE_DEVICE_TABLE() This commit renames the alias symbol, __mod____device_table to __mod_device_table____. This change simplifies the code slightly, as there is no longer a need to check both the prefix and suffix. Signed-off-by: Masahiro Yamada --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 88ecc5e9f523..3dd79a3d0cbf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -247,7 +247,7 @@ extern void cleanup_module(void); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ -extern typeof(name) __mod_##type##__##name##_device_table \ +extern typeof(name) __mod_device_table__##type##__##name \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) -- cgit v1.2.3 From 49b2b973325add467270e906d4d794aa6679b38b Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 19 Nov 2024 14:31:43 +0100 Subject: net: Comment copy_from_sockptr() explaining its behaviour copy_from_sockptr() has a history of misuse. Add a comment explaining that the function follows API of copy_from_user(), i.e. returns 0 for success, or number of bytes not copied on error. Signed-off-by: Michal Luczaj Signed-off-by: Paolo Abeni --- include/linux/sockptr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 195debe2b1db..3e6c8e9d67ae 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -53,6 +53,8 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, /* Deprecated. * This is unsafe, unless caller checked user provided optlen. * Prefer copy_safe_from_sockptr() instead. + * + * Returns 0 for success, or number of bytes not copied on error. */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { -- cgit v1.2.3 From e2668c34b7e1a2288ea0a97ccf3cd12e2870ca18 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 23 Nov 2024 14:50:12 +0000 Subject: net: phy: fix phy_ethtool_set_eee() incorrectly enabling LPI When phy_ethtool_set_eee_noneg() detects a change in the LPI parameters, it attempts to update phylib state and trigger the link to cycle so the MAC sees the updated parameters. However, in doing so, it sets phydev->enable_tx_lpi depending on whether the EEE configuration allows the MAC to generate LPI without taking into account the result of negotiation. This can be demonstrated with a 1000base-T FD interface by: # ethtool --set-eee eno0 advertise 8 # cause EEE to be not negotiated # ethtool --set-eee eno0 tx-lpi off # ethtool --set-eee eno0 tx-lpi on This results in being true, despite EEE not having been negotiated and: # ethtool --show-eee eno0 EEE status: enabled - inactive Tx LPI: 250 (us) Supported EEE link modes: 100baseT/Full 1000baseT/Full Advertised EEE link modes: 100baseT/Full 1000baseT/Full Fix this by keeping track of whether EEE was negotiated via a new eee_active member in struct phy_device, and include this state in the decision whether phydev->enable_tx_lpi should be set. Fixes: 3e43b903da04 ("net: phy: Immediately call adjust_link if only tx_lpi_enabled changes") Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tErSe-005RhB-2R@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 77c6d6451638..563c46205685 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -602,6 +602,7 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_active: phylib private state, indicating that EEE has been negotiated * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host @@ -723,6 +724,7 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); bool enable_tx_lpi; + bool eee_active; struct eee_config eee_cfg; /* Host supported PHY interface types. Should be ignored if empty. */ -- cgit v1.2.3 From 4d17c25eaf5d8b95d70726e6946e8eb94619e139 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 21 Nov 2024 16:29:31 +0100 Subject: delay: Fix ndelay() spuriously treated as udelay() A recent rework on delay functions wrongly ended up calling __udelay() instead of __ndelay() for nanosecond delays, increasing those by 1000. As a result hangs have been observed on boot Restore the right function calls. Fixes: 19e2d91d8cb1 ("delay: Rework udelay and ndelay") Reported-by: Chen-Yu Tsai Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Tested-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/all/20241121152931.51884-1-frederic@kernel.org --- include/asm-generic/delay.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h index 76cf237b6e4c..03b0ec7afca6 100644 --- a/include/asm-generic/delay.h +++ b/include/asm-generic/delay.h @@ -75,11 +75,11 @@ static __always_inline void ndelay(unsigned long nsec) { if (__builtin_constant_p(nsec)) { if (nsec >= DELAY_CONST_MAX) - __bad_udelay(); + __bad_ndelay(); else __const_udelay(nsec * NDELAY_CONST_MULT); } else { - __udelay(nsec); + __ndelay(nsec); } } #define ndelay(x) ndelay(x) -- cgit v1.2.3 From f69e63756f7822fcdad8a34f9967e8b243e883ee Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 2 Oct 2024 18:31:47 +0100 Subject: printf: Remove unused 'bprintf' bprintf() is unused. Remove it. It was added in the commit 4370aa4aa753 ("vsprintf: add binary printf") but as far as I can see was never used, unlike the other two functions in that patch. Link: https://lore.kernel.org/20241002173147.210107-1-linux@treblig.org Reviewed-by: Andy Shevchenko Acked-by: Petr Mladek Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Steven Rostedt (Google) --- include/linux/string.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 0dd27afcfaf7..493ac4862c77 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -335,7 +335,6 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); -int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, -- cgit v1.2.3