From 7efa84b5cdd6d473c7e80912638fca9d7167f202 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 4 Apr 2025 15:10:02 -0700 Subject: compiler-gcc.h: Introduce __diag_GCC_all It is not possible disabling a diagnostic for all versions of GCC without hard coding the minimum supported version at the site, as the GCC specific macros require a minimum version to disable the warning for: __diag_ignore(GCC, 5, ...); __diag_ignore_all() does not solve this issue because it disables a diagnostic for all versions of both GCC and clang, not just one or the other. Introduce __diag_GCC_all so that developers can write __diag_ignore(GCC, all, ...); to disable a particular diagnostic for all versions of GCC, while not affecting clang. Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com Signed-off-by: Nathan Chancellor Tested-by: Andy Shevchenko Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-1-5d6c5c55b2bd@kernel.org Signed-off-by: Petr Mladek --- include/linux/compiler-gcc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c9b58188ec61..c75a222880f9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -127,6 +127,8 @@ #define __diag_GCC_8(s) #endif +#define __diag_GCC_all(s) __diag(s) + #define __diag_ignore_all(option, comment) \ __diag(__diag_GCC_ignore option) -- cgit v1.2.3 From 4e591b890afa0cbc3479f3b88fa7dc1d28972761 Mon Sep 17 00:00:00 2001 From: Thierry Bultel Date: Thu, 15 May 2025 16:18:17 +0200 Subject: dt-bindings: clock: renesas,cpg-mssr: Document RZ/T2H support Document RZ/T2H (a.k.a. r9a09g077) cpg-mssr (Clock Pulse Generator) binding. Reviewed-by: "Rob Herring (Arm)" Signed-off-by: Thierry Bultel Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250515141828.43444-3-thierry.bultel.yh@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- .../dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h new file mode 100644 index 000000000000..1b22fe88dec7 --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2025 Renesas Electronics Corp. + */ + +#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ +#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ + +#include + +/* R9A09G077 CPG Core Clocks */ +#define R9A09G077_CLK_CA55C0 0 +#define R9A09G077_CLK_CA55C1 1 +#define R9A09G077_CLK_CA55C2 2 +#define R9A09G077_CLK_CA55C3 3 +#define R9A09G077_CLK_CA55S 4 +#define R9A09G077_CLK_CR52_CPU0 5 +#define R9A09G077_CLK_CR52_CPU1 6 +#define R9A09G077_CLK_CKIO 7 +#define R9A09G077_CLK_PCLKAH 8 +#define R9A09G077_CLK_PCLKAM 9 +#define R9A09G077_CLK_PCLKAL 10 +#define R9A09G077_CLK_PCLKGPTL 11 +#define R9A09G077_CLK_PCLKH 12 +#define R9A09G077_CLK_PCLKM 13 + +#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ */ -- cgit v1.2.3 From edfc4c8a1edffa6849e19ffade1be8dd824989d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Tue, 3 Jun 2025 17:45:27 +0200 Subject: cgroup: Drop sock_cgroup_classid() dummy implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The semantic of returning 0 is unclear when !CONFIG_CGROUP_NET_CLASSID. Since there are no callers of sock_cgroup_classid() with that config anymore we can undefine the helper at all and enforce all (future) callers to handle cases when !CONFIG_CGROUP_NET_CLASSID. Signed-off-by: Michal Koutný Link: https://lore.kernel.org/r/Z_52r_v9-3JUzDT7@calendula/ Acked-by: Tejun Heo Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e61687d5e496..cd7f093e34cd 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -898,14 +898,12 @@ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) #endif } +#ifdef CONFIG_CGROUP_NET_CLASSID static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { -#ifdef CONFIG_CGROUP_NET_CLASSID return READ_ONCE(skcd->classid); -#else - return 0; -#endif } +#endif static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) @@ -915,13 +913,13 @@ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, #endif } +#ifdef CONFIG_CGROUP_NET_CLASSID static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { -#ifdef CONFIG_CGROUP_NET_CLASSID WRITE_ONCE(skcd->classid, classid); -#endif } +#endif #else /* CONFIG_SOCK_CGROUP_DATA */ -- cgit v1.2.3 From 49b393af3130c7712c7e8f215f4126c9a8060fa6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Jun 2025 10:21:38 +0200 Subject: perf: Add comment to enum perf_event_state Better describe the event states. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Leo Yan Link: https://lkml.kernel.org/r/20250604135801.GK38114@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 52dc7cfab0e0..ec9d96025683 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -635,8 +635,46 @@ struct perf_addr_filter_range { unsigned long size; }; -/** - * enum perf_event_state - the states of an event: +/* + * The normal states are: + * + * ACTIVE --. + * ^ | + * | | + * sched_{in,out}() | + * | | + * v | + * ,---> INACTIVE --+ <-. + * | | | + * | {dis,en}able() + * sched_in() | | + * | OFF <--' --+ + * | | + * `---> ERROR ------' + * + * That is: + * + * sched_in: INACTIVE -> {ACTIVE,ERROR} + * sched_out: ACTIVE -> INACTIVE + * disable: {ACTIVE,INACTIVE} -> OFF + * enable: {OFF,ERROR} -> INACTIVE + * + * Where {OFF,ERROR} are disabled states. + * + * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of + * defunct events: + * + * - EXIT means task that the even was assigned to died, but child events + * still live, and further children can still be created. But the event + * itself will never be active again. It can only transition to + * {REVOKED,DEAD}; + * + * - REVOKED means the PMU the event was associated with is gone; all + * functionality is stopped but the event is still alive. Can only + * transition to DEAD; + * + * - DEAD event really is DYING tearing down state and freeing bits. + * */ enum perf_event_state { PERF_EVENT_STATE_DEAD = -5, -- cgit v1.2.3 From 2fe1c59347369fa856ae259e2fac3c8c8dd9d335 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Tue, 3 Jun 2025 23:43:07 +0800 Subject: bpf: Add cookie to raw_tp bpf_link_info After commit 68ca5d4eebb8 ("bpf: support BPF cookie in raw tracepoint (raw_tp, tp_btf) programs"), we can show the cookie in bpf_link_info like kprobe etc. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20250603154309.3063644-1-chen.dylane@linux.dev --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 85180e4aaa5a..f1160ebbf526 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6651,6 +6651,8 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; -- cgit v1.2.3 From 267be32b0a7b70cc777f8a46f0904c92c0521d89 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 4 Jun 2025 02:07:22 +0000 Subject: ASoC: remove component->id No one is using component->id. One idea is we can re-use it as serial number for component. But we have no usage, so far. Let's just remove it for now. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/877c1suuna.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 61534ac0edd1..2caa807c6249 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -206,7 +206,6 @@ struct snd_soc_component_driver { struct snd_soc_component { const char *name; - int id; const char *name_prefix; struct device *dev; struct snd_soc_card *card; -- cgit v1.2.3 From d7181a2d43cffb19f1e5c19f6d2328f190c87d70 Mon Sep 17 00:00:00 2001 From: Martijn de Gouw Date: Sun, 25 May 2025 09:18:20 +0200 Subject: dt-bindings: regulator: add pca9450: Add regulator-allowed-modes Make the PWM mode on the buck controllers configurable from devicetree. Some boards require forced PWM mode to keep the supply ripple within acceptable limits under light load conditions. Signed-off-by: Martijn de Gouw Acked-by: Conor Dooley Link: https://patch.msgid.link/20250525071823.819342-1-martijn.de.gouw@prodrive-technologies.com Signed-off-by: Mark Brown --- include/dt-bindings/regulator/nxp,pca9450-regulator.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/dt-bindings/regulator/nxp,pca9450-regulator.h (limited to 'include') diff --git a/include/dt-bindings/regulator/nxp,pca9450-regulator.h b/include/dt-bindings/regulator/nxp,pca9450-regulator.h new file mode 100644 index 000000000000..08434caef429 --- /dev/null +++ b/include/dt-bindings/regulator/nxp,pca9450-regulator.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Device Tree binding constants for the NXP PCA9450A/B/C PMIC regulators + */ + +#ifndef _DT_BINDINGS_REGULATORS_NXP_PCA9450_H +#define _DT_BINDINGS_REGULATORS_NXP_PCA9450_H + +/* + * Buck mode constants which may be used in devicetree properties (eg. + * regulator-initial-mode, regulator-allowed-modes). + * See the manufacturer's datasheet for more information on these modes. + */ + +#define PCA9450_BUCK_MODE_AUTO 0 +#define PCA9450_BUCK_MODE_FORCE_PWM 1 + +#endif -- cgit v1.2.3 From dc38441890ec0c54d032395ea9c365a4307185fa Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 16 May 2025 11:26:21 +0300 Subject: iio: backend: add support for filter config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add backend support for digital filter type selection. This setting can be adjusted within the IP cores interfacing devices. The IP core can be configured based on the state of the actual digital filter configuration of the part. Reviewed-by: Nuno Sá Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250516082630.8236-2-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index e59d909cb659..8a1690f21318 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -76,6 +76,14 @@ enum iio_backend_interface_type { IIO_BACKEND_INTERFACE_MAX }; +enum iio_backend_filter_type { + IIO_BACKEND_FILTER_TYPE_DISABLED, + IIO_BACKEND_FILTER_TYPE_SINC1, + IIO_BACKEND_FILTER_TYPE_SINC5, + IIO_BACKEND_FILTER_TYPE_SINC5_PLUS_COMP, + IIO_BACKEND_FILTER_TYPE_MAX +}; + /** * struct iio_backend_ops - operations structure for an iio_backend * @enable: Enable backend. @@ -101,6 +109,7 @@ enum iio_backend_interface_type { * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. + * @filter_type_set: Set filter type. * @ddr_enable: Enable interface DDR (Double Data Rate) mode. * @ddr_disable: Disable interface DDR (Double Data Rate) mode. * @data_stream_enable: Enable data stream. @@ -153,6 +162,8 @@ struct iio_backend_ops { size_t len); int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg, unsigned int writeval, unsigned int *readval); + int (*filter_type_set)(struct iio_backend *back, + enum iio_backend_filter_type type); int (*ddr_enable)(struct iio_backend *back); int (*ddr_disable)(struct iio_backend *back); int (*data_stream_enable)(struct iio_backend *back); @@ -195,6 +206,8 @@ int iio_backend_data_sample_trigger(struct iio_backend *back, int devm_iio_backend_request_buffer(struct device *dev, struct iio_backend *back, struct iio_dev *indio_dev); +int iio_backend_filter_type_set(struct iio_backend *back, + enum iio_backend_filter_type type); int iio_backend_ddr_enable(struct iio_backend *back); int iio_backend_ddr_disable(struct iio_backend *back); int iio_backend_data_stream_enable(struct iio_backend *back); -- cgit v1.2.3 From 995fd6e002b0d1ac435faed68005585906467e92 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 16 May 2025 11:26:22 +0300 Subject: iio: backend: add support for data alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add backend support for staring the capture synchronization. When activated, it initates a proccess that aligns the sample's most significant bit (MSB) based solely on the captured data, without considering any other external signals. Reviewed-by: Nuno Sá Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250516082630.8236-3-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 8a1690f21318..c579eb523466 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -110,6 +110,7 @@ enum iio_backend_filter_type { * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. * @filter_type_set: Set filter type. + * @interface_data_align: Perform the data alignment process. * @ddr_enable: Enable interface DDR (Double Data Rate) mode. * @ddr_disable: Disable interface DDR (Double Data Rate) mode. * @data_stream_enable: Enable data stream. @@ -164,6 +165,7 @@ struct iio_backend_ops { unsigned int writeval, unsigned int *readval); int (*filter_type_set)(struct iio_backend *back, enum iio_backend_filter_type type); + int (*interface_data_align)(struct iio_backend *back, u32 timeout_us); int (*ddr_enable)(struct iio_backend *back); int (*ddr_disable)(struct iio_backend *back); int (*data_stream_enable)(struct iio_backend *back); @@ -208,6 +210,7 @@ int devm_iio_backend_request_buffer(struct device *dev, struct iio_dev *indio_dev); int iio_backend_filter_type_set(struct iio_backend *back, enum iio_backend_filter_type type); +int iio_backend_interface_data_align(struct iio_backend *back, u32 timeout_us); int iio_backend_ddr_enable(struct iio_backend *back); int iio_backend_ddr_disable(struct iio_backend *back); int iio_backend_data_stream_enable(struct iio_backend *back); -- cgit v1.2.3 From 5ef4cc6d2414d115cf09a6a33c3155f12e58a27d Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 16 May 2025 11:26:23 +0300 Subject: iio: backend: add support for number of lanes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add iio backend support for number of lanes to be enabled. Reviewed-by: Nuno Sá Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250516082630.8236-4-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index c579eb523466..1f528fbd9d11 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -111,6 +111,7 @@ enum iio_backend_filter_type { * @debugfs_reg_access: Read or write register value of backend. * @filter_type_set: Set filter type. * @interface_data_align: Perform the data alignment process. + * @num_lanes_set: Set the number of lanes enabled. * @ddr_enable: Enable interface DDR (Double Data Rate) mode. * @ddr_disable: Disable interface DDR (Double Data Rate) mode. * @data_stream_enable: Enable data stream. @@ -166,6 +167,7 @@ struct iio_backend_ops { int (*filter_type_set)(struct iio_backend *back, enum iio_backend_filter_type type); int (*interface_data_align)(struct iio_backend *back, u32 timeout_us); + int (*num_lanes_set)(struct iio_backend *back, unsigned int num_lanes); int (*ddr_enable)(struct iio_backend *back); int (*ddr_disable)(struct iio_backend *back); int (*data_stream_enable)(struct iio_backend *back); @@ -211,6 +213,7 @@ int devm_iio_backend_request_buffer(struct device *dev, int iio_backend_filter_type_set(struct iio_backend *back, enum iio_backend_filter_type type); int iio_backend_interface_data_align(struct iio_backend *back, u32 timeout_us); +int iio_backend_num_lanes_set(struct iio_backend *back, unsigned int num_lanes); int iio_backend_ddr_enable(struct iio_backend *back); int iio_backend_ddr_disable(struct iio_backend *back); int iio_backend_data_stream_enable(struct iio_backend *back); -- cgit v1.2.3 From 342c52dde2f031add61ddeaced9c100f88e04d09 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 6 Jun 2025 16:19:17 +0200 Subject: iio: core: add ADC delay calibration definition ADCs as ad7606 implement a phase calibration as a delay. Add such definition, needed for ad7606. Signed-off-by: Angelo Dureghello Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250606-wip-bl-ad7606-calibration-v9-2-6e014a1f92a2@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index d89982c98368..ad2761efcc83 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -69,6 +69,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_CALIBAMBIENT, IIO_CHAN_INFO_ZEROPOINT, IIO_CHAN_INFO_TROUGH, + IIO_CHAN_INFO_CONVDELAY, }; #endif /* _IIO_TYPES_H_ */ -- cgit v1.2.3 From 4c46a471be12216347ba707f8eadadbf5d68e698 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 3 Jun 2025 16:38:53 +0530 Subject: firmware: arm_ffa: Fix the missing entry in struct ffa_indirect_msg_hdr As per the spec, one 32 bit reserved entry is missing here, add it. Signed-off-by: Viresh Kumar Fixes: 910cc1acc9b4 ("firmware: arm_ffa: Add support for passing UUID in FFA_MSG_SEND2") Reviewed-by: Bertrand Marquis Message-Id: <28a624fbf416975de4fbe08cfbf7c2db89cb630e.1748948911.git.viresh.kumar@linaro.org> Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5bded24dc24f..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; uuid_t uuid; }; -- cgit v1.2.3 From de1c831a7898f164c1c2703c6b2b9e4fb4bebefc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 10:02:33 -0700 Subject: slab: Decouple slab_debug and no_hash_pointers Some system owners use slab_debug=FPZ (or similar) as a hardening option, but do not want to be forced into having kernel addresses exposed due to the implicit "no_hash_pointers" boot param setting.[1] Introduce the "hash_pointers" boot param, which defaults to "auto" (the current behavior), but also includes "always" (forcing on hashing even when "slab_debug=..." is defined), and "never". The existing "no_hash_pointers" boot param becomes an alias for "hash_pointers=never". This makes it possible to boot with "slab_debug=FPZ hash_pointers=always". Link: https://github.com/KSPP/linux/issues/368 [1] Fixes: 792702911f58 ("slub: force on no_hash_pointers when slub_debug is enabled") Co-developed-by: Sergio Perez Gonzalez Signed-off-by: Sergio Perez Gonzalez Acked-by: Vlastimil Babka Acked-by: David Rientjes Reviewed-by: Bagas Sanjaya Signed-off-by: Kees Cook Reviewed-by: Harry Yoo Acked-by: Rafael Aquini Tested-by: Petr Mladek Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250415170232.it.467-kees@kernel.org [kees@kernel.org: Add note about hash_pointers into slab_debug kernel parameter documentation.] Signed-off-by: Petr Mladek --- include/linux/sprintf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 51cab2def9ec..521bb2cd2648 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -22,7 +22,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); /* These are for specific cases, do not use without real need */ extern bool no_hash_pointers; -int no_hash_pointers_enable(char *str); +void hash_pointers_finalize(bool slub_debug); /* Used for Rust formatting ('%pA') */ char *rust_fmt_argument(char *buf, char *end, const void *ptr); -- cgit v1.2.3 From 78b2d9908b42ea70e42f00af5db08ad514727a45 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 5 May 2025 13:14:22 -0700 Subject: net: intel: rename 'hena' to 'hashcfg' for clarity i40e, ice, and iAVF all use 'hena' as a shorthand for the "hash enable" configuration. This comes originally from the X710 datasheet 'xxQF_HENA' registers. In the context of the registers the meaning is fairly clear. However, on its own, hena is a weird name that can be more difficult to understand. This is especially true in ice. The E810 hardware doesn't even have registers with HENA in the name. Replace the shorthand 'hena' with 'hashcfg'. This makes it clear the variables deal with the Hash configuration, not just a single boolean on/off for all hashing. Do not update the register names. These come directly from the datasheet for X710 and X722, and it is more important that the names can be searched. Suggested-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Jacob Keller Tested-by: Rafal Romanowski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index cf0afa60e4a7..362d1cdc8cd8 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -132,8 +132,8 @@ enum virtchnl_ops { VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP = VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, VIRTCHNL_OP_CONFIG_RSS_KEY = 23, VIRTCHNL_OP_CONFIG_RSS_LUT = 24, - VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, - VIRTCHNL_OP_SET_RSS_HENA = 26, + VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS = 25, + VIRTCHNL_OP_SET_RSS_HASHCFG = 26, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, VIRTCHNL_OP_REQUEST_QUEUES = 29, @@ -974,18 +974,18 @@ struct virtchnl_rss_lut { VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_lut); #define virtchnl_rss_lut_LEGACY_SIZEOF 6 -/* VIRTCHNL_OP_GET_RSS_HENA_CAPS - * VIRTCHNL_OP_SET_RSS_HENA - * VF sends these messages to get and set the hash filter enable bits for RSS. +/* VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS + * VIRTCHNL_OP_SET_RSS_HASHCFG + * VF sends these messages to get and set the hash filter configuration for RSS. * By default, the PF sets these to all possible traffic types that the * hardware supports. The VF can query this value if it wants to change the * traffic types that are hashed by the hardware. */ -struct virtchnl_rss_hena { - u64 hena; +struct virtchnl_rss_hashcfg { + u64 hashcfg; }; -VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hashcfg); /* Type of RSS algorithm */ enum virtchnl_rss_algorithm { @@ -1779,10 +1779,10 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_CONFIG_RSS_HFUNC: valid_len = sizeof(struct virtchnl_rss_hfunc); break; - case VIRTCHNL_OP_GET_RSS_HENA_CAPS: + case VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS: break; - case VIRTCHNL_OP_SET_RSS_HENA: - valid_len = sizeof(struct virtchnl_rss_hena); + case VIRTCHNL_OP_SET_RSS_HASHCFG: + valid_len = sizeof(struct virtchnl_rss_hashcfg); break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: -- cgit v1.2.3 From 141d0c9037ca57dac2d2c4e5d3c21521aa70ff12 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 5 May 2025 13:14:23 -0700 Subject: net: intel: move RSS packet classifier types to libie The Intel i40e, iavf, and ice drivers all include a definition of the packet classifier filter types used to program RSS hash enable bits. For i40e, these bits are used for both the PF and VF to configure the PFQF_HENA and VFQF_HENA registers. For ice and iAVF, these bits are used to communicate the desired hash enable filter over virtchnl via its struct virtchnl_rss_hashena. The virtchnl.h header makes no mention of where the bit definitions reside. Maintaining a separate copy of these bits across three drivers is cumbersome. Move the definition to libie as a new pctype.h header file. Each driver can include this, and drop its own definition. The ice implementation also defined a ICE_AVF_FLOW_FIELD_INVALID, intending to use this to indicate when there were no hash enable bits set. This is confusing, since the enumeration is using bit positions. A value of 0 *should* indicate the first bit. Instead, rewrite the code that uses ICE_AVF_FLOW_FIELD_INVALID to just check if the avf_hash is zero. From context this should be clear that we're checking if none of the bits are set. The values are kept as bit positions instead of encoding the BIT_ULL directly into their value. While most users will simply use BIT_ULL immediately, i40e uses the macros both with BIT_ULL and test_bit/set_bit calls. Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Tested-by: Rafal Romanowski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 1 + include/linux/net/intel/libie/pctype.h | 41 ++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 include/linux/net/intel/libie/pctype.h (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 362d1cdc8cd8..5be1881abbb6 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -982,6 +982,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_lut); * traffic types that are hashed by the hardware. */ struct virtchnl_rss_hashcfg { + /* Bits defined by enum libie_filter_pctype */ u64 hashcfg; }; diff --git a/include/linux/net/intel/libie/pctype.h b/include/linux/net/intel/libie/pctype.h new file mode 100644 index 000000000000..d783417fbf36 --- /dev/null +++ b/include/linux/net/intel/libie/pctype.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef __LIBIE_PCTYPE_H +#define __LIBIE_PCTYPE_H + +/* Packet Classifier Type indexes, used to set the xxQF_HENA registers. Also + * communicated over the virtchnl API as part of struct virtchnl_rss_hashena. + */ +enum libie_filter_pctype { + /* Note: Values 0-28 are reserved for future use. + * Value 29, 30, 32 are not supported on XL710 and X710. + */ + LIBIE_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP = 29, + LIBIE_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP = 30, + LIBIE_FILTER_PCTYPE_NONF_IPV4_UDP = 31, + LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK = 32, + LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP = 33, + LIBIE_FILTER_PCTYPE_NONF_IPV4_SCTP = 34, + LIBIE_FILTER_PCTYPE_NONF_IPV4_OTHER = 35, + LIBIE_FILTER_PCTYPE_FRAG_IPV4 = 36, + /* Note: Values 37-38 are reserved for future use. + * Value 39, 40, 42 are not supported on XL710 and X710. + */ + LIBIE_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP = 39, + LIBIE_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP = 40, + LIBIE_FILTER_PCTYPE_NONF_IPV6_UDP = 41, + LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK = 42, + LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP = 43, + LIBIE_FILTER_PCTYPE_NONF_IPV6_SCTP = 44, + LIBIE_FILTER_PCTYPE_NONF_IPV6_OTHER = 45, + LIBIE_FILTER_PCTYPE_FRAG_IPV6 = 46, + /* Note: Value 47 is reserved for future use */ + LIBIE_FILTER_PCTYPE_FCOE_OX = 48, + LIBIE_FILTER_PCTYPE_FCOE_RX = 49, + LIBIE_FILTER_PCTYPE_FCOE_OTHER = 50, + /* Note: Values 51-62 are reserved for future use */ + LIBIE_FILTER_PCTYPE_L2_PAYLOAD = 63 +}; + +#endif /* __LIBIE_PCTYPE_H */ -- cgit v1.2.3 From 7b746d584ab97c66a0aa8ef15da1e2aa8152e3fa Mon Sep 17 00:00:00 2001 From: Andrea della Porta Date: Thu, 29 May 2025 15:50:38 +0200 Subject: dt-bindings: clock: Add RaspberryPi RP1 clock bindings Add device tree bindings for the clock generator found in RP1 multi function device, and relative entries in MAINTAINERS file. Signed-off-by: Andrea della Porta Reviewed-by: Krzysztof Kozlowski Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20250529135052.28398-1-andrea.porta@suse.com Signed-off-by: Florian Fainelli --- include/dt-bindings/clock/raspberrypi,rp1-clocks.h | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/dt-bindings/clock/raspberrypi,rp1-clocks.h (limited to 'include') diff --git a/include/dt-bindings/clock/raspberrypi,rp1-clocks.h b/include/dt-bindings/clock/raspberrypi,rp1-clocks.h new file mode 100644 index 000000000000..248efb895f35 --- /dev/null +++ b/include/dt-bindings/clock/raspberrypi,rp1-clocks.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2021 Raspberry Pi Ltd. + */ + +#ifndef __DT_BINDINGS_CLOCK_RASPBERRYPI_RP1 +#define __DT_BINDINGS_CLOCK_RASPBERRYPI_RP1 + +#define RP1_PLL_SYS_CORE 0 +#define RP1_PLL_AUDIO_CORE 1 +#define RP1_PLL_VIDEO_CORE 2 + +#define RP1_PLL_SYS 3 +#define RP1_PLL_AUDIO 4 +#define RP1_PLL_VIDEO 5 + +#define RP1_PLL_SYS_PRI_PH 6 +#define RP1_PLL_SYS_SEC_PH 7 +#define RP1_PLL_AUDIO_PRI_PH 8 + +#define RP1_PLL_SYS_SEC 9 +#define RP1_PLL_AUDIO_SEC 10 +#define RP1_PLL_VIDEO_SEC 11 + +#define RP1_CLK_SYS 12 +#define RP1_CLK_SLOW_SYS 13 +#define RP1_CLK_DMA 14 +#define RP1_CLK_UART 15 +#define RP1_CLK_ETH 16 +#define RP1_CLK_PWM0 17 +#define RP1_CLK_PWM1 18 +#define RP1_CLK_AUDIO_IN 19 +#define RP1_CLK_AUDIO_OUT 20 +#define RP1_CLK_I2S 21 +#define RP1_CLK_MIPI0_CFG 22 +#define RP1_CLK_MIPI1_CFG 23 +#define RP1_CLK_PCIE_AUX 24 +#define RP1_CLK_USBH0_MICROFRAME 25 +#define RP1_CLK_USBH1_MICROFRAME 26 +#define RP1_CLK_USBH0_SUSPEND 27 +#define RP1_CLK_USBH1_SUSPEND 28 +#define RP1_CLK_ETH_TSU 29 +#define RP1_CLK_ADC 30 +#define RP1_CLK_SDIO_TIMER 31 +#define RP1_CLK_SDIO_ALT_SRC 32 +#define RP1_CLK_GP0 33 +#define RP1_CLK_GP1 34 +#define RP1_CLK_GP2 35 +#define RP1_CLK_GP3 36 +#define RP1_CLK_GP4 37 +#define RP1_CLK_GP5 38 +#define RP1_CLK_VEC 39 +#define RP1_CLK_DPI 40 +#define RP1_CLK_MIPI0_DPI 41 +#define RP1_CLK_MIPI1_DPI 42 + +/* Extra PLL output channels - RP1B0 only */ +#define RP1_PLL_VIDEO_PRI_PH 43 +#define RP1_PLL_AUDIO_TERN 44 + +#endif -- cgit v1.2.3 From 9b8367b604c739947ec308874f087fe0eb80f412 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 09:31:36 -0700 Subject: cgroup: Add bpf prog revisions to struct cgroup_bpf One of key items in mprog API is revision for prog list. The revision number will be increased if the prog list changed, e.g., attach, detach or replace. Add 'revisions' field to struct cgroup_bpf, representing revisions for all cgroup related attachment types. The initial revision value is set to 1, the same as kernel mprog implementations. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250606163136.2428732-1-yonghong.song@linux.dev --- include/linux/bpf-cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 0985221d5478..c9e6b26abab6 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -63,6 +63,7 @@ struct cgroup_bpf { */ struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; + u64 revisions[MAX_CGROUP_BPF_ATTACH_TYPE]; /* list of cgroup shared storages */ struct list_head storages; -- cgit v1.2.3 From 1209339844601ec1766f4ff430673fbcfe42bb51 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 6 Jun 2025 09:31:41 -0700 Subject: bpf: Implement mprog API on top of existing cgroup progs Current cgroup prog ordering is appending at attachment time. This is not ideal. In some cases, users want specific ordering at a particular cgroup level. To address this, the existing mprog API seems an ideal solution with supporting BPF_F_BEFORE and BPF_F_AFTER flags. But there are a few obstacles to directly use kernel mprog interface. Currently cgroup bpf progs already support prog attach/detach/replace and link-based attach/detach/replace. For example, in struct bpf_prog_array_item, the cgroup_storage field needs to be together with bpf prog. But the mprog API struct bpf_mprog_fp only has bpf_prog as the member, which makes it difficult to use kernel mprog interface. In another case, the current cgroup prog detach tries to use the same flag as in attach. This is different from mprog kernel interface which uses flags passed from user space. So to avoid modifying existing behavior, I made the following changes to support mprog API for cgroup progs: - The support is for prog list at cgroup level. Cross-level prog list (a.k.a. effective prog list) is not supported. - Previously, BPF_F_PREORDER is supported only for prog attach, now BPF_F_PREORDER is also supported by link-based attach. - For attach, BPF_F_BEFORE/BPF_F_AFTER/BPF_F_ID/BPF_F_LINK is supported similar to kernel mprog but with different implementation. - For detach and replace, use the existing implementation. - For attach, detach and replace, the revision for a particular prog list, associated with a particular attach type, will be updated by increasing count by 1. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250606163141.2428937-1-yonghong.song@linux.dev --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f1160ebbf526..25e9cf92ffaf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1794,6 +1794,13 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; }; } link_create; -- cgit v1.2.3 From c7beb48344d2ea0f3f1869b078309dbeb2ed4c96 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Sat, 7 Jun 2025 00:58:14 +0800 Subject: bpf: Add cookie to tracing bpf_link_info bpf_tramp_link includes cookie info, we can add it in bpf_link_info. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250606165818.3394397-1-chen.dylane@linux.dev --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25e9cf92ffaf..194ed9891b40 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6665,6 +6665,8 @@ struct bpf_link_info { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; -- cgit v1.2.3 From ae7795a8c2582b5fb7971132753810a3f158e7b2 Mon Sep 17 00:00:00 2001 From: Huan Tang Date: Fri, 23 May 2025 14:46:04 +0800 Subject: scsi: ufs: core: Add HID support Follow JESD220G, support HID(Host Initiated Defragmentation) through sysfs, the relevant sysfs nodes are as follows: 1. analysis_trigger 2. defrag_trigger 3. fragmented_size 4. defrag_size 5. progress_ratio 6. state The detailed definition of the six nodes can be found in the sysfs documentation. HID's execution policy is given to user-space. Signed-off-by: Huan Tang Signed-off-by: Wenxing Cheng Link: https://lore.kernel.org/r/20250523064604.800-1-tanghuan@vivo.com Suggested-by: Bart Van Assche Reviewed-by: Peter Wang Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Reviewed-by: Yangtao Li Signed-off-by: Martin K. Petersen --- include/ufs/ufs.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index c0c59a8f7256..72fd385037a6 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -182,6 +182,11 @@ enum attr_idn { QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, QUERY_ATTR_IDN_TIMESTAMP = 0x30, QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID = 0x34, + QUERY_ATTR_IDN_HID_DEFRAG_OPERATION = 0x35, + QUERY_ATTR_IDN_HID_AVAILABLE_SIZE = 0x36, + QUERY_ATTR_IDN_HID_SIZE = 0x37, + QUERY_ATTR_IDN_HID_PROGRESS_RATIO = 0x38, + QUERY_ATTR_IDN_HID_STATE = 0x39, QUERY_ATTR_IDN_WB_BUF_RESIZE_HINT = 0x3C, QUERY_ATTR_IDN_WB_BUF_RESIZE_EN = 0x3D, QUERY_ATTR_IDN_WB_BUF_RESIZE_STATUS = 0x3E, @@ -401,6 +406,7 @@ enum { UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), UFS_DEV_LVL_EXCEPTION_SUP = BIT(12), + UFS_DEV_HID_SUPPORT = BIT(13), }; #define UFS_DEV_HPB_SUPPORT_VERSION 0x310 @@ -466,6 +472,24 @@ enum ufs_ref_clk_freq { REF_CLK_FREQ_INVAL = -1, }; +/* bDefragOperation attribute values */ +enum ufs_hid_defrag_operation { + HID_ANALYSIS_AND_DEFRAG_DISABLE = 0, + HID_ANALYSIS_ENABLE = 1, + HID_ANALYSIS_AND_DEFRAG_ENABLE = 2, +}; + +/* bHIDState attribute values */ +enum ufs_hid_state { + HID_IDLE = 0, + ANALYSIS_IN_PROGRESS = 1, + DEFRAG_REQUIRED = 2, + DEFRAG_IN_PROGRESS = 3, + DEFRAG_COMPLETED = 4, + DEFRAG_NOT_REQUIRED = 5, + NUM_UFS_HID_STATES = 6, +}; + /* bWriteBoosterBufferResizeEn attribute */ enum wb_resize_en { WB_RESIZE_EN_IDLE = 0, @@ -625,6 +649,8 @@ struct ufs_dev_info { u32 rtc_update_period; u8 rtt_cap; /* bDeviceRTTCap */ + + bool hid_sup; }; /* -- cgit v1.2.3 From 9d2c232d575a8c8dfa66276ed7edccfac482a4df Mon Sep 17 00:00:00 2001 From: Kassey Li Date: Wed, 21 May 2025 09:17:11 +0800 Subject: scsi: trace: Show rtn in string for scsi_dispatch_cmd_error() By default the scsi_dispatch_cmd_error() return value is displayed in decimal: kworker/3:1H-183 [003] .... 51.035474: scsi_dispatch_cmd_error: host_no=0 channel=0 id=0 lun=4 data_sgl=1 prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=3907214 txlen=1 protect=0 raw=28 00 00 3b 9e 8e 00 00 01 00) rtn=4181 However, these numbers are not particularly helpful wrt. debugging errors. Especially since the kernel code consistently uses the following defines in hexadecimal: SCSI_MLQUEUE_HOST_BUSY 0x1055 SCSI_MLQUEUE_DEVICE_BUSY 0x1056 SCSI_MLQUEUE_EH_RETRY 0x1057 SCSI_MLQUEUE_TARGET_BUSY 0x1058 Switch to using the string form of these values in the trace output: dd-1059 [007] ..... 31.689529: scsi_dispatch_cmd_error: host_no=0 channel=0 id=0 lun=4 data_sgl=65 prot_sgl=0 prot_op=SCSI_PROT_NORMAL driver_tag=23 scheduler_tag=117 cmnd=(READ_10 lba=0 txlen=128 protect=0 raw=28 00 00 00 00 00 00 00 80 00) rtn=SCSI_MLQUEUE_DEVICE_BUSY Signed-off-by: Kassey Li Link: https://lore.kernel.org/r/20250521011711.1983625-1-quic_yingangl@quicinc.com Signed-off-by: Martin K. Petersen --- include/trace/events/scsi.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h index bf6cc98d9122..c36c72ab7f2b 100644 --- a/include/trace/events/scsi.h +++ b/include/trace/events/scsi.h @@ -200,6 +200,14 @@ TRACE_EVENT(scsi_dispatch_cmd_start, __print_hex(__get_dynamic_array(cmnd), __entry->cmd_len)) ); +#define scsi_rtn_name(result) { result, #result } +#define show_rtn_name(val) \ + __print_symbolic(val, \ + scsi_rtn_name(SCSI_MLQUEUE_HOST_BUSY), \ + scsi_rtn_name(SCSI_MLQUEUE_DEVICE_BUSY), \ + scsi_rtn_name(SCSI_MLQUEUE_EH_RETRY), \ + scsi_rtn_name(SCSI_MLQUEUE_TARGET_BUSY)) + TRACE_EVENT(scsi_dispatch_cmd_error, TP_PROTO(struct scsi_cmnd *cmd, int rtn), @@ -240,14 +248,15 @@ TRACE_EVENT(scsi_dispatch_cmd_error, TP_printk("host_no=%u channel=%u id=%u lun=%u data_sgl=%u prot_sgl=%u" \ " prot_op=%s driver_tag=%d scheduler_tag=%d cmnd=(%s %s raw=%s)" \ - " rtn=%d", + " rtn=%s", __entry->host_no, __entry->channel, __entry->id, __entry->lun, __entry->data_sglen, __entry->prot_sglen, show_prot_op_name(__entry->prot_op), __entry->driver_tag, __entry->scheduler_tag, show_opcode_name(__entry->opcode), __parse_cdb(__get_dynamic_array(cmnd), __entry->cmd_len), __print_hex(__get_dynamic_array(cmnd), __entry->cmd_len), - __entry->rtn) + show_rtn_name(__entry->rtn) + ) ); DECLARE_EVENT_CLASS(scsi_cmd_done_timeout_template, -- cgit v1.2.3 From 03c68a0f8c68936a0bb915b030693923784724cb Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Tue, 3 Jun 2025 23:13:18 +0200 Subject: bpf, arm64, powerpc: Add bpf_jit_bypass_spec_v1/v4() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JITs can set bpf_jit_bypass_spec_v1/v4() if they want the verifier to skip analysis/patching for the respective vulnerability. For v4, this will reduce the number of barriers the verifier inserts. For v1, it allows more programs to be accepted. The primary motivation for this is to not regress unpriv BPF's performance on ARM64 in a future commit where BPF_NOSPEC is also used against Spectre v1. This has the user-visible change that v1-induced rejections on non-vulnerable PowerPC CPUs are avoided. For now, this does not change the semantics of BPF_NOSPEC. It is still a v4-only barrier and must not be implemented if bypass_spec_v4 is always true for the arch. Changing it to a v1 AND v4-barrier is done in a future commit. As an alternative to bypass_spec_v1/v4, one could introduce NOSPEC_V1 AND NOSPEC_V4 instructions and allow backends to skip their lowering as suggested by commit f5e81d111750 ("bpf: Introduce BPF nospec instruction for mitigating Spectre v4"). Adding bpf_jit_bypass_spec_v1/v4() was found to be preferable for the following reason: * bypass_spec_v1/v4 benefits non-vulnerable CPUs: Always performing the same analysis (not taking into account whether the current CPU is vulnerable), needlessly restricts users of CPUs that are not vulnerable. The only use case for this would be portability-testing, but this can later be added easily when needed by allowing users to force bypass_spec_v1/v4 to false. * Portability is still acceptable: Directly disabling the analysis instead of skipping the lowering of BPF_NOSPEC(_V1/V4) might allow programs on non-vulnerable CPUs to be accepted while the program will be rejected on vulnerable CPUs. With the fallback to speculation barriers for Spectre v1 implemented in a future commit, this will only affect programs that do variable stack-accesses or are very complex. For PowerPC, the SEC_FTR checking in bpf_jit_bypass_spec_v4() is based on the check that was previously located in the BPF_NOSPEC case. For LoongArch, it would likely be safe to set both bpf_jit_bypass_spec_v1() and _v4() according to commit a6f6a95f2580 ("LoongArch, bpf: Fix jit to skip speculation barrier opcode"). This is omitted here as I am unable to do any testing for LoongArch. Hari's ack concerns the PowerPC part only. Signed-off-by: Luis Gerhorst Acked-by: Hari Bathini Cc: Henriette Herzog Cc: Maximilian Ott Cc: Milan Stephan Link: https://lore.kernel.org/r/20250603211318.337474-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b25d278409b..5dd556e89cce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2288,6 +2288,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, return ret; } +bool bpf_jit_bypass_spec_v1(void); +bool bpf_jit_bypass_spec_v4(void); + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -2475,12 +2478,16 @@ static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return bpf_jit_bypass_spec_v1() || + cpu_mitigations_off() || + bpf_token_capable(token, CAP_PERFMON); } static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return bpf_jit_bypass_spec_v4() || + cpu_mitigations_off() || + bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); -- cgit v1.2.3 From dff883d9e93a7f2f2fa4e38a9444b2c79d6da91a Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Tue, 3 Jun 2025 23:17:03 +0200 Subject: bpf, arm64, powerpc: Change nospec to include v1 barrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This changes the semantics of BPF_NOSPEC (previously a v4-only barrier) to always emit a speculation barrier that works against both Spectre v1 AND v4. If mitigation is not needed on an architecture, the backend should set bpf_jit_bypass_spec_v4/v1(). As of now, this commit only has the user-visible implication that unpriv BPF's performance on PowerPC is reduced. This is the case because we have to emit additional v1 barrier instructions for BPF_NOSPEC now. This commit is required for a future commit to allow us to rely on BPF_NOSPEC for Spectre v1 mitigation. As of this commit, the feature that nospec acts as a v1 barrier is unused. Commit f5e81d111750 ("bpf: Introduce BPF nospec instruction for mitigating Spectre v4") noted that mitigation instructions for v1 and v4 might be different on some archs. While this would potentially offer improved performance on PowerPC, it was dismissed after the following considerations: * Only having one barrier simplifies the verifier and allows us to easily rely on v4-induced barriers for reducing the complexity of v1-induced speculative path verification. * For the architectures that implemented BPF_NOSPEC, only PowerPC has distinct instructions for v1 and v4. Even there, some insns may be shared between the barriers for v1 and v4 (e.g., 'ori 31,31,0' and 'sync'). If this is still found to impact performance in an unacceptable way, BPF_NOSPEC can be split into BPF_NOSPEC_V1 and BPF_NOSPEC_V4 later. As an optimization, we can already skip v1/v4 insns from being emitted for PowerPC with this setup if bypass_spec_v1/v4 is set. Vulnerability-status for BPF_NOSPEC-based Spectre mitigations (v4 as of this commit, v1 in the future) is therefore: * x86 (32-bit and 64-bit), ARM64, and PowerPC (64-bit): Mitigated - This patch implements BPF_NOSPEC for these architectures. The previous v4-only version was supported since commit f5e81d111750 ("bpf: Introduce BPF nospec instruction for mitigating Spectre v4") and commit b7540d625094 ("powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC"). * LoongArch: Not Vulnerable - Commit a6f6a95f2580 ("LoongArch, bpf: Fix jit to skip speculation barrier opcode") is the only other past commit related to BPF_NOSPEC and indicates that the insn is not required there. * MIPS: Vulnerable (if unprivileged BPF is enabled) - Commit a6f6a95f2580 ("LoongArch, bpf: Fix jit to skip speculation barrier opcode") indicates that it is not vulnerable, but this contradicts the kernel and Debian documentation. Therefore, I assume that there exist vulnerable MIPS CPUs (but maybe not from Loongson?). In the future, BPF_NOSPEC could be implemented for MIPS based on the GCC speculation_barrier [1]. For now, we rely on unprivileged BPF being disabled by default. * Other: Unknown - To the best of my knowledge there is no definitive information available that indicates that any other arch is vulnerable. They are therefore left untouched (BPF_NOSPEC is not implemented, but bypass_spec_v1/v4 is also not set). I did the following testing to ensure the insn encoding is correct: * ARM64: * 'dsb nsh; isb' was successfully tested with the BPF CI in [2] * 'sb' locally using QEMU v7.2.15 -cpu max (emitted sb insn is executed for example with './test_progs -t verifier_array_access') * PowerPC: The following configs were tested locally with ppc64le QEMU v8.2 '-machine pseries -cpu POWER9': * STF_BARRIER_EIEIO + CONFIG_PPC_BOOK32_64 * STF_BARRIER_SYNC_ORI (forced on) + CONFIG_PPC_BOOK32_64 * STF_BARRIER_FALLBACK (forced on) + CONFIG_PPC_BOOK32_64 * CONFIG_PPC_E500 (forced on) + STF_BARRIER_EIEIO * CONFIG_PPC_E500 (forced on) + STF_BARRIER_SYNC_ORI (forced on) * CONFIG_PPC_E500 (forced on) + STF_BARRIER_FALLBACK (forced on) * CONFIG_PPC_E500 (forced on) + STF_BARRIER_NONE (forced on) Most of those cobinations should not occur in practice, but I was not able to get an PPC e6500 rootfs (for testing PPC_E500 without forcing it on). In any case, this should ensure that there are no unexpected conflicts between the insns when combined like this. Individual v1/v4 barriers were already emitted elsewhere. Hari's ack is for the PowerPC changes only. [1] https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29b74545531f6afbee9fc38c267524326dbfbedf ("MIPS: Add speculation_barrier support") [2] https://github.com/kernel-patches/bpf/pull/8576 Signed-off-by: Luis Gerhorst Acked-by: Hari Bathini Cc: Henriette Herzog Cc: Maximilian Ott Cc: Milan Stephan Link: https://lore.kernel.org/r/20250603211703.337860-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index f5cf4d35d83e..eca229752cbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -82,7 +82,7 @@ struct ctl_table_header; #define BPF_CALL_ARGS 0xe0 /* unused opcode to mark speculation barrier for mitigating - * Speculative Store Bypass + * Spectre v1 and v4 */ #define BPF_NOSPEC 0xc0 -- cgit v1.2.3 From 9124a4508007f146206a279f0c5e81dde314bda1 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Tue, 3 Jun 2025 23:20:24 +0200 Subject: bpf: Rename sanitize_stack_spill to nospec_result This is made to clarify that this flag will cause a nospec to be added after this insn and can therefore be relied upon to reduce speculative path analysis. Signed-off-by: Luis Gerhorst Acked-by: Kumar Kartikeya Dwivedi Cc: Henriette Herzog Cc: Maximilian Ott Cc: Milan Stephan Link: https://lore.kernel.org/r/20250603212024.338154-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 256274acb1d8..2b0954202226 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -580,7 +580,7 @@ struct bpf_insn_aux_data { u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ - bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ + bool nospec_result; /* result is unsafe under speculation, nospec must follow */ bool zext_dst; /* this insn zero extends dst reg */ bool needs_zext; /* alu op needs to clear upper bits */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ -- cgit v1.2.3 From d6f1c85f22534d2d9fea9b32645da19c91ebe7d2 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Tue, 3 Jun 2025 23:24:28 +0200 Subject: bpf: Fall back to nospec for Spectre v1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the core of the series and causes the verifier to fall back to mitigating Spectre v1 using speculation barriers. The approach was presented at LPC'24 [1] and RAID'24 [2]. If we find any forbidden behavior on a speculative path, we insert a nospec (e.g., lfence speculation barrier on x86) before the instruction and stop verifying the path. While verifying a speculative path, we can furthermore stop verification of that path whenever we encounter a nospec instruction. A minimal example program would look as follows: A = true B = true if A goto e f() if B goto e unsafe() e: exit There are the following speculative and non-speculative paths (`cur->speculative` and `speculative` referring to the value of the push_stack() parameters): - A = true - B = true - if A goto e - A && !cur->speculative && !speculative - exit - !A && !cur->speculative && speculative - f() - if B goto e - B && cur->speculative && !speculative - exit - !B && cur->speculative && speculative - unsafe() If f() contains any unsafe behavior under Spectre v1 and the unsafe behavior matches `state->speculative && error_recoverable_with_nospec(err)`, do_check() will now add a nospec before f() instead of rejecting the program: A = true B = true if A goto e nospec f() if B goto e unsafe() e: exit Alternatively, the algorithm also takes advantage of nospec instructions inserted for other reasons (e.g., Spectre v4). Taking the program above as an example, speculative path exploration can stop before f() if a nospec was inserted there because of Spectre v4 sanitization. In this example, all instructions after the nospec are dead code (and with the nospec they are also dead code speculatively). For this, it relies on the fact that speculation barriers generally prevent all later instructions from executing if the speculation was not correct: * On Intel x86_64, lfence acts as full speculation barrier, not only as a load fence [3]: An LFENCE instruction or a serializing instruction will ensure that no later instructions execute, even speculatively, until all prior instructions complete locally. [...] Inserting an LFENCE instruction after a bounds check prevents later operations from executing before the bound check completes. This was experimentally confirmed in [4]. * On AMD x86_64, lfence is dispatch-serializing [5] (requires MSR C001_1029[1] to be set if the MSR is supported, this happens in init_amd()). AMD further specifies "A dispatch serializing instruction forces the processor to retire the serializing instruction and all previous instructions before the next instruction is executed" [8]. As dispatch is not specific to memory loads or branches, lfence therefore also affects all instructions there. Also, if retiring a branch means it's PC change becomes architectural (should be), this means any "wrong" speculation is aborted as required for this series. * ARM's SB speculation barrier instruction also affects "any instruction that appears later in the program order than the barrier" [6]. * PowerPC's barrier also affects all subsequent instructions [7]: [...] executing an ori R31,R31,0 instruction ensures that all instructions preceding the ori R31,R31,0 instruction have completed before the ori R31,R31,0 instruction completes, and that no subsequent instructions are initiated, even out-of-order, until after the ori R31,R31,0 instruction completes. The ori R31,R31,0 instruction may complete before storage accesses associated with instructions preceding the ori R31,R31,0 instruction have been performed Regarding the example, this implies that `if B goto e` will not execute before `if A goto e` completes. Once `if A goto e` completes, the CPU should find that the speculation was wrong and continue with `exit`. If there is any other path that leads to `if B goto e` (and therefore `unsafe()`) without going through `if A goto e`, then a nospec will still be needed there. However, this patch assumes this other path will be explored separately and therefore be discovered by the verifier even if the exploration discussed here stops at the nospec. This patch furthermore has the unfortunate consequence that Spectre v1 mitigations now only support architectures which implement BPF_NOSPEC. Before this commit, Spectre v1 mitigations prevented exploits by rejecting the programs on all architectures. Because some JITs do not implement BPF_NOSPEC, this patch therefore may regress unpriv BPF's security to a limited extent: * The regression is limited to systems vulnerable to Spectre v1, have unprivileged BPF enabled, and do NOT emit insns for BPF_NOSPEC. The latter is not the case for x86 64- and 32-bit, arm64, and powerpc 64-bit and they are therefore not affected by the regression. According to commit a6f6a95f2580 ("LoongArch, bpf: Fix jit to skip speculation barrier opcode"), LoongArch is not vulnerable to Spectre v1 and therefore also not affected by the regression. * To the best of my knowledge this regression may therefore only affect MIPS. This is deemed acceptable because unpriv BPF is still disabled there by default. As stated in a previous commit, BPF_NOSPEC could be implemented for MIPS based on GCC's speculation_barrier implementation. * It is unclear which other architectures (besides x86 64- and 32-bit, ARM64, PowerPC 64-bit, LoongArch, and MIPS) supported by the kernel are vulnerable to Spectre v1. Also, it is not clear if barriers are available on these architectures. Implementing BPF_NOSPEC on these architectures therefore is non-trivial. Searching GCC and the kernel for speculation barrier implementations for these architectures yielded no result. * If any of those regressed systems is also vulnerable to Spectre v4, the system was already vulnerable to Spectre v4 attacks based on unpriv BPF before this patch and the impact is therefore further limited. As an alternative to regressing security, one could still reject programs if the architecture does not emit BPF_NOSPEC (e.g., by removing the empty BPF_NOSPEC-case from all JITs except for LoongArch where it appears justified). However, this will cause rejections on these archs that are likely unfounded in the vast majority of cases. In the tests, some are now successful where we previously had a false-positive (i.e., rejection). Change them to reflect where the nospec should be inserted (using __xlated_unpriv) and modify the error message if the nospec is able to mitigate a problem that previously shadowed another problem (in that case __xlated_unpriv does not work, therefore just add a comment). Define SPEC_V1 to avoid duplicating this ifdef whenever we check for nospec insns using __xlated_unpriv, define it here once. This also improves readability. PowerPC can probably also be added here. However, omit it for now because the BPF CI currently does not include a test. Limit it to EPERM, EACCES, and EINVAL (and not everything except for EFAULT and ENOMEM) as it already has the desired effect for most real-world programs. Briefly went through all the occurrences of EPERM, EINVAL, and EACCESS in verifier.c to validate that catching them like this makes sense. Thanks to Dustin for their help in checking the vendor documentation. [1] https://lpc.events/event/18/contributions/1954/ ("Mitigating Spectre-PHT using Speculation Barriers in Linux eBPF") [2] https://arxiv.org/pdf/2405.00078 ("VeriFence: Lightweight and Precise Spectre Defenses for Untrusted Linux Kernel Extensions") [3] https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/runtime-speculative-side-channel-mitigations.html ("Managed Runtime Speculative Execution Side Channel Mitigations") [4] https://dl.acm.org/doi/pdf/10.1145/3359789.3359837 ("Speculator: a tool to analyze speculative execution attacks and mitigations" - Section 4.6 "Stopping Speculative Execution") [5] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/software-techniques-for-managing-speculation.pdf ("White Paper - SOFTWARE TECHNIQUES FOR MANAGING SPECULATION ON AMD PROCESSORS - REVISION 5.09.23") [6] https://developer.arm.com/documentation/ddi0597/2020-12/Base-Instructions/SB--Speculation-Barrier- ("SB - Speculation Barrier - Arm Armv8-A A32/T32 Instruction Set Architecture (2020-12)") [7] https://wiki.raptorcs.com/w/images/5/5f/OPF_PowerISA_v3.1C.pdf ("Power ISA™ - Version 3.1C - May 26, 2024 - Section 9.2.1 of Book III") [8] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf ("AMD64 Architecture Programmer’s Manual Volumes 1–5 - Revision 4.08 - April 2024 - 7.6.4 Serializing Instructions") Signed-off-by: Luis Gerhorst Acked-by: Kumar Kartikeya Dwivedi Acked-by: Henriette Herzog Cc: Dustin Nguyen Cc: Maximilian Ott Cc: Milan Stephan Link: https://lore.kernel.org/r/20250603212428.338473-1-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2b0954202226..e6c26393c029 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -580,6 +580,7 @@ struct bpf_insn_aux_data { u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ + bool nospec; /* do not execute this instruction speculatively */ bool nospec_result; /* result is unsafe under speculation, nospec must follow */ bool zext_dst; /* this insn zero extends dst reg */ bool needs_zext; /* alu op needs to clear upper bits */ -- cgit v1.2.3 From bee7e3322a2859a80a67077591128323bbc4052f Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 13 Nov 2024 01:50:18 +0900 Subject: can: bittiming: rename CAN_CTRLMODE_TDC_MASK into CAN_CTRLMODE_FD_TDC_MASK With the introduction of CAN XL, a new CAN_CTRLMODE_XL_TDC_MASK will be introduced later on. Because CAN_CTRLMODE_TDC_MASK is not part of the uapi, rename it to CAN_CTRLMODE_FD_TDC_MASK to make it more explicit that this mask is meant for CAN FD. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20241112165118.586613-10-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 2 +- include/linux/can/dev.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 9b8a9c39614b..5dfdbb63b1d5 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -14,7 +14,7 @@ #define CAN_BITRATE_UNSET 0 #define CAN_BITRATE_UNKNOWN (-1U) -#define CAN_CTRLMODE_TDC_MASK \ +#define CAN_CTRLMODE_FD_TDC_MASK \ (CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL) /* diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 492d23bec7be..e492dfa8a472 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -93,7 +93,7 @@ struct can_priv { static inline bool can_tdc_is_enabled(const struct can_priv *priv) { - return !!(priv->ctrlmode & CAN_CTRLMODE_TDC_MASK); + return !!(priv->ctrlmode & CAN_CTRLMODE_FD_TDC_MASK); } /* -- cgit v1.2.3 From 23c0dc95bfa86503eed9fa99423fa0bb39a3bcb0 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 13 Nov 2024 01:50:19 +0900 Subject: can: bittiming: rename can_tdc_is_enabled() into can_fd_tdc_is_enabled() With the introduction of CAN XL, a new can_xl_tdc_is_enabled() helper function will be introduced later on. Rename can_tdc_is_enabled() into can_fd_tdc_is_enabled() to make it more explicit that this helper is meant for CAN FD. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20241112165118.586613-11-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index e492dfa8a472..9a92cbe5b2cb 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -91,7 +91,7 @@ struct can_priv { struct can_berr_counter *bec); }; -static inline bool can_tdc_is_enabled(const struct can_priv *priv) +static inline bool can_fd_tdc_is_enabled(const struct can_priv *priv) { return !!(priv->ctrlmode & CAN_CTRLMODE_FD_TDC_MASK); } -- cgit v1.2.3 From a5589313383074c48a1b3751d592a6e084ae0573 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 1 Jun 2025 00:21:39 +0300 Subject: gpiolib: Remove unused devm_gpio_request() Remove devm_gpio_request() due to lack of users. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250531212331.3635269-3-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index c1ec62c11ed3..61e9f43c082a 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -110,7 +110,6 @@ static inline int gpio_to_irq(unsigned gpio) int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); -int devm_gpio_request(struct device *dev, unsigned gpio, const char *label); int devm_gpio_request_one(struct device *dev, unsigned gpio, unsigned long flags, const char *label); @@ -188,13 +187,6 @@ static inline int gpio_to_irq(unsigned gpio) return -EINVAL; } -static inline int devm_gpio_request(struct device *dev, unsigned gpio, - const char *label) -{ - WARN_ON(1); - return -EINVAL; -} - static inline int devm_gpio_request_one(struct device *dev, unsigned gpio, unsigned long flags, const char *label) { -- cgit v1.2.3 From 9b4d4c952e28f97c5e653c8b9453690f7e63cc5a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 31 May 2025 22:55:43 +0300 Subject: gpio: Remove unused 'struct gpio' definition There is no user for the legacy 'struct gpio', remove it for good. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250531195801.3632110-2-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 61e9f43c082a..8697ab817898 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -21,18 +21,6 @@ struct device; #define GPIOF_OUT_INIT_LOW ((0 << 0) | (0 << 1)) #define GPIOF_OUT_INIT_HIGH ((0 << 0) | (1 << 1)) -/** - * struct gpio - a structure describing a GPIO with configuration - * @gpio: the GPIO number - * @flags: GPIO configuration as specified by GPIOF_* - * @label: a literal description string of this GPIO - */ -struct gpio { - unsigned gpio; - unsigned long flags; - const char *label; -}; - #ifdef CONFIG_GPIOLIB #include -- cgit v1.2.3 From 6595ea2761df191c2ec500d5f54b57592b969f5c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 31 May 2025 22:55:44 +0300 Subject: gpiolib: Move GPIO_DYNAMIC_* constants to its only user There is no need to export GPIO_DYNAMIC_* constants, especially via legacy header which is subject to remove. Move the mentioned constants to its only user, i.e. gpiolib.c. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250531195801.3632110-3-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 8697ab817898..ff99ed76fdc3 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -45,19 +45,6 @@ static inline bool gpio_is_valid(int number) * extra memory (for code and for per-GPIO table entries). */ -/* - * At the end we want all GPIOs to be dynamically allocated from 0. - * However, some legacy drivers still perform fixed allocation. - * Until they are all fixed, leave 0-512 space for them. - */ -#define GPIO_DYNAMIC_BASE 512 -/* - * Define the maximum of the possible GPIO in the global numberspace. - * While the GPIO base and numbers are positive, we limit it with signed - * maximum as a lot of code is using negative values for special cases. - */ -#define GPIO_DYNAMIC_MAX INT_MAX - /* Always use the library code for GPIO management calls, * or when sleeping may be involved. */ -- cgit v1.2.3 From 127c49624a0980ee7b8a5ba9094d6942332a48da Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 4 Jun 2025 18:06:04 +0200 Subject: can: add drop reasons in the receive path of AF_CAN Besides the existing pr_warn_once(), use skb drop reasons in case AF_CAN layer drops non-conformant CAN{,FD,XL} frames, or conformant frames received by "wrong" devices, so that it's possible to debug (and count) such events using existing tracepoints: | # perf record -e skb:kfree_skb -aR -- ./drv/canfdtest -v -g -l 1 vcan0 | # perf script | [...] | canfdtest 1123 [000] 3893.271264: skb:kfree_skb: skbaddr=0xffff975703c9f700 rx_sk=(nil) protocol=12 location=can_rcv+0x4b reason: CAN_RX_INVALID_FRAME Signed-off-by: Davide Caratti Link: https://patch.msgid.link/20250604160605.1005704-2-dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- include/net/dropreason-core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index bcf9d7467e1a..b9e78290269e 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -121,6 +121,9 @@ FN(ARP_PVLAN_DISABLE) \ FN(MAC_IEEE_MAC_CONTROL) \ FN(BRIDGE_INGRESS_STP_STATE) \ + FN(CAN_RX_INVALID_FRAME) \ + FN(CANFD_RX_INVALID_FRAME) \ + FN(CANXL_RX_INVALID_FRAME) \ FNe(MAX) /** @@ -573,6 +576,21 @@ enum skb_drop_reason { * ingress bridge port does not allow frames to be forwarded. */ SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE, + /** + * @SKB_DROP_REASON_CAN_RX_INVALID_FRAME: received + * non conform CAN frame (or device is unable to receive CAN frames) + */ + SKB_DROP_REASON_CAN_RX_INVALID_FRAME, + /** + * @SKB_DROP_REASON_CANFD_RX_INVALID_FRAME: received + * non conform CAN-FD frame (or device is unable to receive CAN frames) + */ + SKB_DROP_REASON_CANFD_RX_INVALID_FRAME, + /** + * @SKB_DROP_REASON_CANXL_RX_INVALID_FRAME: received + * non conform CAN-XL frame (or device is unable to receive CAN frames) + */ + SKB_DROP_REASON_CANXL_RX_INVALID_FRAME, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From 4a59e02a5aa178f80d065ad0368d29f25620c334 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 27 May 2025 14:24:02 +0300 Subject: dt-bindings: clock: rzg2l: Drop power domain IDs Since the configuration order between the individual MSTOP and CLKON bits cannot be preserved with the power domain abstraction, drop the power domain IDs. The corresponding code has also been removed. Currently, there are no device tree users for these IDs. Acked-by: "Rob Herring (Arm)" Reviewed-by: Geert Uytterhoeven Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/20250527112403.1254122-8-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/r9a07g043-cpg.h | 53 ----------------------- include/dt-bindings/clock/r9a07g044-cpg.h | 58 ------------------------- include/dt-bindings/clock/r9a07g054-cpg.h | 58 ------------------------- include/dt-bindings/clock/r9a08g045-cpg.h | 71 ------------------------------- 4 files changed, 240 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/r9a07g043-cpg.h b/include/dt-bindings/clock/r9a07g043-cpg.h index 131993343777..e1f65f1928cf 100644 --- a/include/dt-bindings/clock/r9a07g043-cpg.h +++ b/include/dt-bindings/clock/r9a07g043-cpg.h @@ -200,57 +200,4 @@ #define R9A07G043_AX45MP_CORE0_RESETN 78 /* RZ/Five Only */ #define R9A07G043_IAX45_RESETN 79 /* RZ/Five Only */ -/* Power domain IDs. */ -#define R9A07G043_PD_ALWAYS_ON 0 -#define R9A07G043_PD_GIC 1 /* RZ/G2UL Only */ -#define R9A07G043_PD_IA55 2 /* RZ/G2UL Only */ -#define R9A07G043_PD_MHU 3 /* RZ/G2UL Only */ -#define R9A07G043_PD_CORESIGHT 4 /* RZ/G2UL Only */ -#define R9A07G043_PD_SYC 5 /* RZ/G2UL Only */ -#define R9A07G043_PD_DMAC 6 -#define R9A07G043_PD_GTM0 7 -#define R9A07G043_PD_GTM1 8 -#define R9A07G043_PD_GTM2 9 -#define R9A07G043_PD_MTU 10 -#define R9A07G043_PD_POE3 11 -#define R9A07G043_PD_WDT0 12 -#define R9A07G043_PD_SPI 13 -#define R9A07G043_PD_SDHI0 14 -#define R9A07G043_PD_SDHI1 15 -#define R9A07G043_PD_ISU 16 /* RZ/G2UL Only */ -#define R9A07G043_PD_CRU 17 /* RZ/G2UL Only */ -#define R9A07G043_PD_LCDC 18 /* RZ/G2UL Only */ -#define R9A07G043_PD_SSI0 19 -#define R9A07G043_PD_SSI1 20 -#define R9A07G043_PD_SSI2 21 -#define R9A07G043_PD_SSI3 22 -#define R9A07G043_PD_SRC 23 -#define R9A07G043_PD_USB0 24 -#define R9A07G043_PD_USB1 25 -#define R9A07G043_PD_USB_PHY 26 -#define R9A07G043_PD_ETHER0 27 -#define R9A07G043_PD_ETHER1 28 -#define R9A07G043_PD_I2C0 29 -#define R9A07G043_PD_I2C1 30 -#define R9A07G043_PD_I2C2 31 -#define R9A07G043_PD_I2C3 32 -#define R9A07G043_PD_SCIF0 33 -#define R9A07G043_PD_SCIF1 34 -#define R9A07G043_PD_SCIF2 35 -#define R9A07G043_PD_SCIF3 36 -#define R9A07G043_PD_SCIF4 37 -#define R9A07G043_PD_SCI0 38 -#define R9A07G043_PD_SCI1 39 -#define R9A07G043_PD_IRDA 40 -#define R9A07G043_PD_RSPI0 41 -#define R9A07G043_PD_RSPI1 42 -#define R9A07G043_PD_RSPI2 43 -#define R9A07G043_PD_CANFD 44 -#define R9A07G043_PD_ADC 45 -#define R9A07G043_PD_TSU 46 -#define R9A07G043_PD_PLIC 47 /* RZ/Five Only */ -#define R9A07G043_PD_IAX45 48 /* RZ/Five Only */ -#define R9A07G043_PD_NCEPLDM 49 /* RZ/Five Only */ -#define R9A07G043_PD_NCEPLMT 50 /* RZ/Five Only */ - #endif /* __DT_BINDINGS_CLOCK_R9A07G043_CPG_H__ */ diff --git a/include/dt-bindings/clock/r9a07g044-cpg.h b/include/dt-bindings/clock/r9a07g044-cpg.h index e209f96f92b7..0bb17ff1a01a 100644 --- a/include/dt-bindings/clock/r9a07g044-cpg.h +++ b/include/dt-bindings/clock/r9a07g044-cpg.h @@ -217,62 +217,4 @@ #define R9A07G044_ADC_ADRST_N 82 #define R9A07G044_TSU_PRESETN 83 -/* Power domain IDs. */ -#define R9A07G044_PD_ALWAYS_ON 0 -#define R9A07G044_PD_GIC 1 -#define R9A07G044_PD_IA55 2 -#define R9A07G044_PD_MHU 3 -#define R9A07G044_PD_CORESIGHT 4 -#define R9A07G044_PD_SYC 5 -#define R9A07G044_PD_DMAC 6 -#define R9A07G044_PD_GTM0 7 -#define R9A07G044_PD_GTM1 8 -#define R9A07G044_PD_GTM2 9 -#define R9A07G044_PD_MTU 10 -#define R9A07G044_PD_POE3 11 -#define R9A07G044_PD_GPT 12 -#define R9A07G044_PD_POEGA 13 -#define R9A07G044_PD_POEGB 14 -#define R9A07G044_PD_POEGC 15 -#define R9A07G044_PD_POEGD 16 -#define R9A07G044_PD_WDT0 17 -#define R9A07G044_PD_WDT1 18 -#define R9A07G044_PD_SPI 19 -#define R9A07G044_PD_SDHI0 20 -#define R9A07G044_PD_SDHI1 21 -#define R9A07G044_PD_3DGE 22 -#define R9A07G044_PD_ISU 23 -#define R9A07G044_PD_VCPL4 24 -#define R9A07G044_PD_CRU 25 -#define R9A07G044_PD_MIPI_DSI 26 -#define R9A07G044_PD_LCDC 27 -#define R9A07G044_PD_SSI0 28 -#define R9A07G044_PD_SSI1 29 -#define R9A07G044_PD_SSI2 30 -#define R9A07G044_PD_SSI3 31 -#define R9A07G044_PD_SRC 32 -#define R9A07G044_PD_USB0 33 -#define R9A07G044_PD_USB1 34 -#define R9A07G044_PD_USB_PHY 35 -#define R9A07G044_PD_ETHER0 36 -#define R9A07G044_PD_ETHER1 37 -#define R9A07G044_PD_I2C0 38 -#define R9A07G044_PD_I2C1 39 -#define R9A07G044_PD_I2C2 40 -#define R9A07G044_PD_I2C3 41 -#define R9A07G044_PD_SCIF0 42 -#define R9A07G044_PD_SCIF1 43 -#define R9A07G044_PD_SCIF2 44 -#define R9A07G044_PD_SCIF3 45 -#define R9A07G044_PD_SCIF4 46 -#define R9A07G044_PD_SCI0 47 -#define R9A07G044_PD_SCI1 48 -#define R9A07G044_PD_IRDA 49 -#define R9A07G044_PD_RSPI0 50 -#define R9A07G044_PD_RSPI1 51 -#define R9A07G044_PD_RSPI2 52 -#define R9A07G044_PD_CANFD 53 -#define R9A07G044_PD_ADC 54 -#define R9A07G044_PD_TSU 55 - #endif /* __DT_BINDINGS_CLOCK_R9A07G044_CPG_H__ */ diff --git a/include/dt-bindings/clock/r9a07g054-cpg.h b/include/dt-bindings/clock/r9a07g054-cpg.h index 2c99f89397c4..43f4dbda872c 100644 --- a/include/dt-bindings/clock/r9a07g054-cpg.h +++ b/include/dt-bindings/clock/r9a07g054-cpg.h @@ -226,62 +226,4 @@ #define R9A07G054_TSU_PRESETN 83 #define R9A07G054_STPAI_ARESETN 84 -/* Power domain IDs. */ -#define R9A07G054_PD_ALWAYS_ON 0 -#define R9A07G054_PD_GIC 1 -#define R9A07G054_PD_IA55 2 -#define R9A07G054_PD_MHU 3 -#define R9A07G054_PD_CORESIGHT 4 -#define R9A07G054_PD_SYC 5 -#define R9A07G054_PD_DMAC 6 -#define R9A07G054_PD_GTM0 7 -#define R9A07G054_PD_GTM1 8 -#define R9A07G054_PD_GTM2 9 -#define R9A07G054_PD_MTU 10 -#define R9A07G054_PD_POE3 11 -#define R9A07G054_PD_GPT 12 -#define R9A07G054_PD_POEGA 13 -#define R9A07G054_PD_POEGB 14 -#define R9A07G054_PD_POEGC 15 -#define R9A07G054_PD_POEGD 16 -#define R9A07G054_PD_WDT0 17 -#define R9A07G054_PD_WDT1 18 -#define R9A07G054_PD_SPI 19 -#define R9A07G054_PD_SDHI0 20 -#define R9A07G054_PD_SDHI1 21 -#define R9A07G054_PD_3DGE 22 -#define R9A07G054_PD_ISU 23 -#define R9A07G054_PD_VCPL4 24 -#define R9A07G054_PD_CRU 25 -#define R9A07G054_PD_MIPI_DSI 26 -#define R9A07G054_PD_LCDC 27 -#define R9A07G054_PD_SSI0 28 -#define R9A07G054_PD_SSI1 29 -#define R9A07G054_PD_SSI2 30 -#define R9A07G054_PD_SSI3 31 -#define R9A07G054_PD_SRC 32 -#define R9A07G054_PD_USB0 33 -#define R9A07G054_PD_USB1 34 -#define R9A07G054_PD_USB_PHY 35 -#define R9A07G054_PD_ETHER0 36 -#define R9A07G054_PD_ETHER1 37 -#define R9A07G054_PD_I2C0 38 -#define R9A07G054_PD_I2C1 39 -#define R9A07G054_PD_I2C2 40 -#define R9A07G054_PD_I2C3 41 -#define R9A07G054_PD_SCIF0 42 -#define R9A07G054_PD_SCIF1 43 -#define R9A07G054_PD_SCIF2 44 -#define R9A07G054_PD_SCIF3 45 -#define R9A07G054_PD_SCIF4 46 -#define R9A07G054_PD_SCI0 47 -#define R9A07G054_PD_SCI1 48 -#define R9A07G054_PD_IRDA 49 -#define R9A07G054_PD_RSPI0 50 -#define R9A07G054_PD_RSPI1 51 -#define R9A07G054_PD_RSPI2 52 -#define R9A07G054_PD_CANFD 53 -#define R9A07G054_PD_ADC 54 -#define R9A07G054_PD_TSU 55 - #endif /* __DT_BINDINGS_CLOCK_R9A07G054_CPG_H__ */ diff --git a/include/dt-bindings/clock/r9a08g045-cpg.h b/include/dt-bindings/clock/r9a08g045-cpg.h index 311521fe4b59..410725b778a8 100644 --- a/include/dt-bindings/clock/r9a08g045-cpg.h +++ b/include/dt-bindings/clock/r9a08g045-cpg.h @@ -239,75 +239,4 @@ #define R9A08G045_I3C_PRESETN 92 #define R9A08G045_VBAT_BRESETN 93 -/* Power domain IDs. */ -#define R9A08G045_PD_ALWAYS_ON 0 -#define R9A08G045_PD_GIC 1 -#define R9A08G045_PD_IA55 2 -#define R9A08G045_PD_MHU 3 -#define R9A08G045_PD_CORESIGHT 4 -#define R9A08G045_PD_SYC 5 -#define R9A08G045_PD_DMAC 6 -#define R9A08G045_PD_GTM0 7 -#define R9A08G045_PD_GTM1 8 -#define R9A08G045_PD_GTM2 9 -#define R9A08G045_PD_GTM3 10 -#define R9A08G045_PD_GTM4 11 -#define R9A08G045_PD_GTM5 12 -#define R9A08G045_PD_GTM6 13 -#define R9A08G045_PD_GTM7 14 -#define R9A08G045_PD_MTU 15 -#define R9A08G045_PD_POE3 16 -#define R9A08G045_PD_GPT 17 -#define R9A08G045_PD_POEGA 18 -#define R9A08G045_PD_POEGB 19 -#define R9A08G045_PD_POEGC 20 -#define R9A08G045_PD_POEGD 21 -#define R9A08G045_PD_WDT0 22 -#define R9A08G045_PD_XSPI 23 -#define R9A08G045_PD_SDHI0 24 -#define R9A08G045_PD_SDHI1 25 -#define R9A08G045_PD_SDHI2 26 -#define R9A08G045_PD_SSI0 27 -#define R9A08G045_PD_SSI1 28 -#define R9A08G045_PD_SSI2 29 -#define R9A08G045_PD_SSI3 30 -#define R9A08G045_PD_SRC 31 -#define R9A08G045_PD_USB0 32 -#define R9A08G045_PD_USB1 33 -#define R9A08G045_PD_USB_PHY 34 -#define R9A08G045_PD_ETHER0 35 -#define R9A08G045_PD_ETHER1 36 -#define R9A08G045_PD_I2C0 37 -#define R9A08G045_PD_I2C1 38 -#define R9A08G045_PD_I2C2 39 -#define R9A08G045_PD_I2C3 40 -#define R9A08G045_PD_SCIF0 41 -#define R9A08G045_PD_SCIF1 42 -#define R9A08G045_PD_SCIF2 43 -#define R9A08G045_PD_SCIF3 44 -#define R9A08G045_PD_SCIF4 45 -#define R9A08G045_PD_SCIF5 46 -#define R9A08G045_PD_SCI0 47 -#define R9A08G045_PD_SCI1 48 -#define R9A08G045_PD_IRDA 49 -#define R9A08G045_PD_RSPI0 50 -#define R9A08G045_PD_RSPI1 51 -#define R9A08G045_PD_RSPI2 52 -#define R9A08G045_PD_RSPI3 53 -#define R9A08G045_PD_RSPI4 54 -#define R9A08G045_PD_CANFD 55 -#define R9A08G045_PD_ADC 56 -#define R9A08G045_PD_TSU 57 -#define R9A08G045_PD_OCTA 58 -#define R9A08G045_PD_PDM 59 -#define R9A08G045_PD_PCI 60 -#define R9A08G045_PD_SPDIF 61 -#define R9A08G045_PD_I3C 62 -#define R9A08G045_PD_VBAT 63 - -#define R9A08G045_PD_DDR 64 -#define R9A08G045_PD_TZCDDR 65 -#define R9A08G045_PD_OTFDE_DDR 66 -#define R9A08G045_PD_RTC 67 - #endif /* __DT_BINDINGS_CLOCK_R9A08G045_CPG_H__ */ -- cgit v1.2.3 From d209f6e122950d9b6f329f3538b785dd709001e5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 2 Jun 2025 07:58:54 -0400 Subject: filelock: add new locks_wake_up_waiter() helper Currently the function that does this takes a struct file_lock, but __locks_wake_up_blocks() deals with both locks and leases. Currently this works because both file_lock and file_lease have the file_lock_core at the beginning of the struct, but it's fragile to rely on that. Add a new locks_wake_up_waiter() function and call that from __locks_wake_up_blocks(). Signed-off-by: Jeff Layton Link: https://lore.kernel.org/20250602-filelock-6-16-v1-1-7da5b2c930fd@kernel.org Signed-off-by: Christian Brauner --- include/linux/filelock.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filelock.h b/include/linux/filelock.h index c412ded9171e..c2ce8ba05d06 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -175,9 +175,14 @@ static inline bool lock_is_write(struct file_lock *fl) return fl->c.flc_type == F_WRLCK; } +static inline void locks_wake_up_waiter(struct file_lock_core *flc) +{ + wake_up(&flc->flc_wait); +} + static inline void locks_wake_up(struct file_lock *fl) { - wake_up(&fl->c.flc_wait); + locks_wake_up_waiter(&fl->c); } static inline bool locks_can_async_lock(const struct file_operations *fops) -- cgit v1.2.3 From 33877220b8641b4cde474a4229ea92c0e3637883 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Mon, 19 May 2025 11:56:55 +0300 Subject: ata: libata-acpi: Do not assume 40 wire cable if no devices are enabled On at least an ASRock 990FX Extreme 4 with a VIA VT6330, the devices have not yet been enabled by the first time ata_acpi_cbl_80wire() is called. This means that the ata_for_each_dev loop is never entered, and a 40 wire cable is assumed. The VIA controller on this board does not report the cable in the PCI config space, thus having to fall back to ACPI even though no SATA bridge is present. The _GTM values are correctly reported by the firmware through ACPI, which has already set up faster transfer modes, but due to the above the controller is forced down to a maximum of UDMA/33. Resolve this by modifying ata_acpi_cbl_80wire() to directly return the cable type. First, an unknown cable is assumed which preserves the mode set by the firmware, and then on subsequent calls when the devices have been enabled, an 80 wire cable is correctly detected. Since the function now directly returns the cable type, it is renamed to ata_acpi_cbl_pata_type(). Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/20250519085945.1399466-1-tasos@tasossah.com Signed-off-by: Niklas Cassel --- include/linux/libata.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 31be45fd47a6..1e5aec839041 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif -- cgit v1.2.3 From 2d72dd14d77f31a7caa619fe0b889304844e612e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 10 Jun 2025 16:07:56 +0200 Subject: bpf: adjust path to trace_output sample eBPF program The sample file was renamed from trace_output_kern.c to trace_output.bpf.c in commit d4fffba4d04b ("samples/bpf: Change _kern suffix to .bpf with syscall tracing program"). Adjust the path in the documentation comment for bpf_perf_event_output. Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20250610140756.16332-1-tklauser@distanz.ch Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 194ed9891b40..39e7818cca80 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2409,7 +2409,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user -- cgit v1.2.3 From c09ef59e17c6921c577d54bc8da4331b955d01a7 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Mon, 9 Jun 2025 03:01:03 -0700 Subject: net: mana: Expose additional hardware counters for drop and TC via ethtool. Add support for reporting additional hardware counters for drop and TC using the ethtool -S interface. These counters include: - Aggregate Rx/Tx drop counters - Per-TC Rx/Tx packet counters - Per-TC Rx/Tx byte counters - Per-TC Rx/Tx pause frame counters The counters are exposed using ethtool_ops->get_ethtool_stats and ethtool_ops->get_strings. This feature/counters are not available to all versions of hardware. Signed-off-by: Dipayaan Roy Reviewed-by: Subbaraya Sundeep Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/20250609100103.GA7102@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- include/net/mana/mana.h | 131 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9abb66461211..4176edf1be71 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -404,6 +404,65 @@ struct mana_ethtool_stats { u64 rx_cqe_unknown_type; }; +struct mana_ethtool_phy_stats { + /* Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; + struct mana_context { struct gdma_dev *gdma_dev; @@ -474,6 +533,8 @@ struct mana_port_context { struct mana_ethtool_stats eth_stats; + struct mana_ethtool_phy_stats phy_stats; + /* Debugfs */ struct dentry *mana_port_debugfs; }; @@ -501,6 +562,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); +void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); @@ -527,6 +589,7 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + MANA_QUERY_PHY_STAT = 0x2000c, /* Privileged commands for the PF mode */ MANA_REGISTER_FILTER = 0x28000, @@ -689,6 +752,74 @@ struct mana_query_gf_stat_resp { u64 tx_err_gdma; }; /* HW DATA */ +/* Query phy stats */ +struct mana_query_phy_stat_req { + struct gdma_req_hdr hdr; + u64 req_stats; +}; /* HW DATA */ + +struct mana_query_phy_stat_resp { + struct gdma_resp_hdr hdr; + u64 reported_stats; + + /* Aggregate Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC(Traffic class) traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC(Traffic Class) pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; /* HW DATA */ + /* Configure vPort Rx Steering */ struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; -- cgit v1.2.3 From 31557b3487b349464daf42bc4366153743c1e727 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 07:39:33 -0700 Subject: uapi: in6: restore visibility of most IPv6 socket options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A decade ago commit 6d08acd2d32e ("in6: fix conflict with glibc") hid the definitions of IPV6 options, because GCC was complaining about duplicates. The commit did not list the warnings seen, but trying to recreate them now I think they are (building iproute2): In file included from ./include/uapi/rdma/rdma_user_cm.h:39, from rdma.h:16, from res.h:9, from res-ctx.c:7: ../include/uapi/linux/in6.h:171:9: warning: ‘IPV6_ADD_MEMBERSHIP’ redefined 171 | #define IPV6_ADD_MEMBERSHIP 20 | ^~~~~~~~~~~~~~~~~~~ In file included from /usr/include/netinet/in.h:37, from rdma.h:13: /usr/include/bits/in.h:233:10: note: this is the location of the previous definition 233 | # define IPV6_ADD_MEMBERSHIP IPV6_JOIN_GROUP | ^~~~~~~~~~~~~~~~~~~ ../include/uapi/linux/in6.h:172:9: warning: ‘IPV6_DROP_MEMBERSHIP’ redefined 172 | #define IPV6_DROP_MEMBERSHIP 21 | ^~~~~~~~~~~~~~~~~~~~ /usr/include/bits/in.h:234:10: note: this is the location of the previous definition 234 | # define IPV6_DROP_MEMBERSHIP IPV6_LEAVE_GROUP | ^~~~~~~~~~~~~~~~~~~~ Compilers don't complain about redefinition if the defines are identical, but here we have the kernel using the literal value, and glibc using an indirection (defining to a name of another define, with the same numerical value). Problem is, the commit in question hid all the IPV6 socket options, and glibc has a pretty sparse list. For instance it lacks Flow Label related options. Willem called this out in commit 3fb321fde22d ("selftests/net: ipv6 flowlabel"): /* uapi/glibc weirdness may leave this undefined */ #ifndef IPV6_FLOWINFO #define IPV6_FLOWINFO 11 #endif More interestingly some applications (socat) use a #ifdef IPV6_FLOWINFO to gate compilation of thier rudimentary flow label support. (For added confusion socat misspells it as IPV4_FLOWINFO in some places.) Hide only the two defines we know glibc has a problem with. If we discover more warnings we can hide more but we should avoid covering the entire block of defines for "IPV6 socket options". Link: https://patch.msgid.link/20250609143933.1654417-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/in6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index ff8d21f9e95b..5a47339ef7d7 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -152,7 +152,6 @@ struct in6_flowlabel_req { /* * IPV6 socket options */ -#if __UAPI_DEF_IPV6_OPTIONS #define IPV6_ADDRFORM 1 #define IPV6_2292PKTINFO 2 #define IPV6_2292HOPOPTS 3 @@ -169,8 +168,10 @@ struct in6_flowlabel_req { #define IPV6_MULTICAST_IF 17 #define IPV6_MULTICAST_HOPS 18 #define IPV6_MULTICAST_LOOP 19 +#if __UAPI_DEF_IPV6_OPTIONS #define IPV6_ADD_MEMBERSHIP 20 #define IPV6_DROP_MEMBERSHIP 21 +#endif #define IPV6_ROUTER_ALERT 22 #define IPV6_MTU_DISCOVER 23 #define IPV6_MTU 24 @@ -203,7 +204,6 @@ struct in6_flowlabel_req { #define IPV6_IPSEC_POLICY 34 #define IPV6_XFRM_POLICY 35 #define IPV6_HDRINCL 36 -#endif /* * Multicast: -- cgit v1.2.3 From 561939ed44932da639ba703ffcd4d4d5ff2c7569 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Jun 2025 11:32:35 -0400 Subject: net: remove unused sock_enable_timestamps This function was introduced in commit 783da70e8396 ("net: add sock_enable_timestamps"), with one caller in rxrpc. That only caller was removed in commit 7903d4438b3f ("rxrpc: Don't use received skbuff timestamps"). Signed-off-by: Willem de Bruijn Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250609153254.3504909-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..85e17da5c9db 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2982,7 +2982,6 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool); int sock_set_timestamping(struct sock *sk, int optname, struct so_timestamping timestamping); -void sock_enable_timestamps(struct sock *sk); #if defined(CONFIG_CGROUP_BPF) void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op); #else -- cgit v1.2.3 From 2bc64b89c4c4073ee8f9543373c64da9b6bbe5e0 Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:52 +0300 Subject: queue_api: add subqueue variant netif_subqueue_sent Add a new function, netif_subqueue_sent, which is a wrapper for netdev_tx_sent_queue. Drivers that use the subqueue variant macros, netif_subqueue_xxx, identify queue by index and are not required to obtain struct netdev_queue explicitly. Such drivers still need to call netdev_tx_sent_queue which is a counterpart of netif_subqueue_completed_wake. Allowing drivers to use a subqueue variant for this purpose improves their code consistency by always referring to queue by its index. Signed-off-by: Gur Stavi Link: https://patch.msgid.link/909a5c92db49cad39f0954d6cb86775e6480ef4c.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index ba2eaf39089b..6e835972abd1 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -294,6 +294,15 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue, netif_txq_try_stop(_txq, get_desc, start_thrs); \ }) +static inline void netif_subqueue_sent(const struct net_device *dev, + unsigned int idx, unsigned int bytes) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, idx); + netdev_tx_sent_queue(txq, bytes); +} + #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ struct netdev_queue *_txq; \ -- cgit v1.2.3 From 5943c611c47c9444e834555c867dec744158b7ad Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Feb 2025 16:04:47 -0500 Subject: procfs: kill ->proc_dops It has two possible values - one for "forced lookup" entries, another for the normal ones. We'd be better off with that as an explicit flag anyway and in addition to that it opens some fun possibilities with ->d_op and ->d_flags handling. [moved PROC_ENTRY_FORCE_LOOKUP to include/linux/proc_fs.h, switched it to an unused bit - there was a conflict] Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/proc_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index ea62201c74c4..de1d24f19f76 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -27,6 +27,8 @@ enum { PROC_ENTRY_proc_read_iter = 1U << 1, PROC_ENTRY_proc_compat_ioctl = 1U << 2, + + PROC_ENTRY_FORCE_LOOKUP = 1U << 7, }; struct proc_ops { -- cgit v1.2.3 From 790fa81b8c43cda9fe25c1b564d0afe3ddeeb370 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Feb 2025 12:46:49 -0500 Subject: new helper: d_splice_alias_ops() Uses of d_set_d_op() on live dentry can be very dangerous; it is going to be withdrawn and replaced with saner things. The best way for a filesystem is to have the default dentry_operations set at mount time and be done with that - __d_alloc() will use that. Currently there are two cases when d_set_d_op() is used on a live dentry - one is procfs, which has several genuinely different dentry_operations instances (different ->d_revalidate(), etc.) and another is simple_lookup(), where we would be better off without overriding ->d_op. For procfs we have d_set_d_op() calls followed by d_splice_alias(); provide a new helper (d_splice_alias_ops(inode, dentry, d_ops)) that would combine those two, and do the d_set_d_op() part while under ->d_lock. That eliminates one of the places where ->d_flags had been modified without holding ->d_lock; current behaviour is not racy, but the reasons for that are far too brittle. Better move to uniform locking rules and simpler proof of correctness... The next commit will convert procfs to use of that helper; it is not exported and won't be until somebody comes up with convincing modular user for it. Again, the best approach is to have default ->d_op and let __d_alloc() do the right thing; filesystem _may_ need non-uniform ->d_op (procfs does), but there'd better be good reasons for that. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/dcache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e29823c701ac..1993e6704552 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -245,6 +245,9 @@ extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); +/* weird procfs mess; *NOT* exported */ +extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *, + const struct dentry_operations *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name); -- cgit v1.2.3 From 05fb0e666495cda068c068a681ecbbf8e57324d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Feb 2025 19:39:47 -0500 Subject: new helper: set_default_d_op() ... to be used instead of manually assigning to ->s_d_op. All in-tree filesystem converted (and field itself is renamed, so any out-of-tree ones in need of conversion will be caught by compiler). Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/dcache.h | 2 ++ include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1993e6704552..be7ae058fa90 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -607,4 +607,6 @@ static inline struct dentry *d_next_sibling(const struct dentry *dentry) return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib); } +void set_default_d_op(struct super_block *, const struct dentry_operations *); + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..7cd8eaab4d4e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1413,7 +1413,7 @@ struct super_block { */ const char *s_subtype; - const struct dentry_operations *s_d_op; /* default d_op for dentries */ + const struct dentry_operations *__s_d_op; /* default d_op for dentries */ struct shrinker *s_shrink; /* per-sb shrinker handle */ -- cgit v1.2.3 From 4ccd065a69df163cd9fe0dd8e0f609f1eeb4723d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 4 Jun 2025 17:54:41 +0800 Subject: crypto: ahash - Add support for drivers with no fallback Some drivers cannot have a fallback, e.g., because the key is held in hardware. Allow these to be used with ahash by adding the bit CRYPTO_ALG_NO_FALLBACK. Signed-off-by: Herbert Xu Tested-by: Harald Freudenberger --- include/linux/crypto.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b50f1954d1bb..a2137e19be7d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -136,6 +136,9 @@ /* Set if the algorithm supports virtual addresses. */ #define CRYPTO_ALG_REQ_VIRT 0x00040000 +/* Set if the algorithm cannot have a fallback (e.g., phmac). */ +#define CRYPTO_ALG_NO_FALLBACK 0x00080000 + /* The high bits 0xff000000 are reserved for type-specific flags. */ /* -- cgit v1.2.3 From 19ac3579af14e17c56c5b8a10979c6ca4aee6e38 Mon Sep 17 00:00:00 2001 From: Satya Priya Kakitapalli Date: Mon, 12 May 2025 10:34:36 +0530 Subject: dt-bindings: clock: qcom: Add missing bindings on gcc-sc8180x The multi-media AHB clocks are needed to create HW dependency in the multimedia CC dt blocks and avoid any issues. They were not defined in the initial bindings. Add all the missing clock bindings for gcc-sc8180x. Signed-off-by: Satya Priya Kakitapalli Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250512-sc8180x-camcc-support-v4-1-8fb1d3265f52@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-sc8180x.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sc8180x.h b/include/dt-bindings/clock/qcom,gcc-sc8180x.h index e364006aa6ea..b9d8438a15ff 100644 --- a/include/dt-bindings/clock/qcom,gcc-sc8180x.h +++ b/include/dt-bindings/clock/qcom,gcc-sc8180x.h @@ -249,6 +249,16 @@ #define GCC_UFS_MEM_CLKREF_EN 239 #define GCC_UFS_CARD_CLKREF_EN 240 #define GPLL9 241 +#define GCC_CAMERA_AHB_CLK 242 +#define GCC_CAMERA_XO_CLK 243 +#define GCC_CPUSS_DVM_BUS_CLK 244 +#define GCC_CPUSS_GNOC_CLK 245 +#define GCC_DISP_AHB_CLK 246 +#define GCC_DISP_XO_CLK 247 +#define GCC_GPU_CFG_AHB_CLK 248 +#define GCC_NPU_CFG_AHB_CLK 249 +#define GCC_VIDEO_AHB_CLK 250 +#define GCC_VIDEO_XO_CLK 251 #define GCC_EMAC_BCR 0 #define GCC_GPU_BCR 1 -- cgit v1.2.3 From b5975ce4615fc075b4a135b867988f654370a268 Mon Sep 17 00:00:00 2001 From: Satya Priya Kakitapalli Date: Mon, 12 May 2025 10:34:37 +0530 Subject: dt-bindings: clock: Add Qualcomm SC8180X Camera clock controller Add device tree bindings for the camera clock controller on Qualcomm SC8180X platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Satya Priya Kakitapalli Link: https://lore.kernel.org/r/20250512-sc8180x-camcc-support-v4-2-8fb1d3265f52@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sc8180x-camcc.h | 181 +++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sc8180x-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sc8180x-camcc.h b/include/dt-bindings/clock/qcom,sc8180x-camcc.h new file mode 100644 index 000000000000..3e57b80f65e8 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sc8180x-camcc.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SC8180X_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SC8180X_H + +/* CAM_CC clocks */ +#define CAM_CC_BPS_AHB_CLK 0 +#define CAM_CC_BPS_AREG_CLK 1 +#define CAM_CC_BPS_AXI_CLK 2 +#define CAM_CC_BPS_CLK 3 +#define CAM_CC_BPS_CLK_SRC 4 +#define CAM_CC_CAMNOC_AXI_CLK 5 +#define CAM_CC_CAMNOC_AXI_CLK_SRC 6 +#define CAM_CC_CAMNOC_DCD_XO_CLK 7 +#define CAM_CC_CCI_0_CLK 8 +#define CAM_CC_CCI_0_CLK_SRC 9 +#define CAM_CC_CCI_1_CLK 10 +#define CAM_CC_CCI_1_CLK_SRC 11 +#define CAM_CC_CCI_2_CLK 12 +#define CAM_CC_CCI_2_CLK_SRC 13 +#define CAM_CC_CCI_3_CLK 14 +#define CAM_CC_CCI_3_CLK_SRC 15 +#define CAM_CC_CORE_AHB_CLK 16 +#define CAM_CC_CPAS_AHB_CLK 17 +#define CAM_CC_CPHY_RX_CLK_SRC 18 +#define CAM_CC_CSI0PHYTIMER_CLK 19 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 20 +#define CAM_CC_CSI1PHYTIMER_CLK 21 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 22 +#define CAM_CC_CSI2PHYTIMER_CLK 23 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 24 +#define CAM_CC_CSI3PHYTIMER_CLK 25 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 26 +#define CAM_CC_CSIPHY0_CLK 27 +#define CAM_CC_CSIPHY1_CLK 28 +#define CAM_CC_CSIPHY2_CLK 29 +#define CAM_CC_CSIPHY3_CLK 30 +#define CAM_CC_FAST_AHB_CLK_SRC 31 +#define CAM_CC_FD_CORE_CLK 32 +#define CAM_CC_FD_CORE_CLK_SRC 33 +#define CAM_CC_FD_CORE_UAR_CLK 34 +#define CAM_CC_ICP_AHB_CLK 35 +#define CAM_CC_ICP_CLK 36 +#define CAM_CC_ICP_CLK_SRC 37 +#define CAM_CC_IFE_0_AXI_CLK 38 +#define CAM_CC_IFE_0_CLK 39 +#define CAM_CC_IFE_0_CLK_SRC 40 +#define CAM_CC_IFE_0_CPHY_RX_CLK 41 +#define CAM_CC_IFE_0_CSID_CLK 42 +#define CAM_CC_IFE_0_CSID_CLK_SRC 43 +#define CAM_CC_IFE_0_DSP_CLK 44 +#define CAM_CC_IFE_1_AXI_CLK 45 +#define CAM_CC_IFE_1_CLK 46 +#define CAM_CC_IFE_1_CLK_SRC 47 +#define CAM_CC_IFE_1_CPHY_RX_CLK 48 +#define CAM_CC_IFE_1_CSID_CLK 49 +#define CAM_CC_IFE_1_CSID_CLK_SRC 50 +#define CAM_CC_IFE_1_DSP_CLK 51 +#define CAM_CC_IFE_2_AXI_CLK 52 +#define CAM_CC_IFE_2_CLK 53 +#define CAM_CC_IFE_2_CLK_SRC 54 +#define CAM_CC_IFE_2_CPHY_RX_CLK 55 +#define CAM_CC_IFE_2_CSID_CLK 56 +#define CAM_CC_IFE_2_CSID_CLK_SRC 57 +#define CAM_CC_IFE_2_DSP_CLK 58 +#define CAM_CC_IFE_3_AXI_CLK 59 +#define CAM_CC_IFE_3_CLK 60 +#define CAM_CC_IFE_3_CLK_SRC 61 +#define CAM_CC_IFE_3_CPHY_RX_CLK 62 +#define CAM_CC_IFE_3_CSID_CLK 63 +#define CAM_CC_IFE_3_CSID_CLK_SRC 64 +#define CAM_CC_IFE_3_DSP_CLK 65 +#define CAM_CC_IFE_LITE_0_CLK 66 +#define CAM_CC_IFE_LITE_0_CLK_SRC 67 +#define CAM_CC_IFE_LITE_0_CPHY_RX_CLK 68 +#define CAM_CC_IFE_LITE_0_CSID_CLK 69 +#define CAM_CC_IFE_LITE_0_CSID_CLK_SRC 70 +#define CAM_CC_IFE_LITE_1_CLK 71 +#define CAM_CC_IFE_LITE_1_CLK_SRC 72 +#define CAM_CC_IFE_LITE_1_CPHY_RX_CLK 73 +#define CAM_CC_IFE_LITE_1_CSID_CLK 74 +#define CAM_CC_IFE_LITE_1_CSID_CLK_SRC 75 +#define CAM_CC_IFE_LITE_2_CLK 76 +#define CAM_CC_IFE_LITE_2_CLK_SRC 77 +#define CAM_CC_IFE_LITE_2_CPHY_RX_CLK 78 +#define CAM_CC_IFE_LITE_2_CSID_CLK 79 +#define CAM_CC_IFE_LITE_2_CSID_CLK_SRC 80 +#define CAM_CC_IFE_LITE_3_CLK 81 +#define CAM_CC_IFE_LITE_3_CLK_SRC 82 +#define CAM_CC_IFE_LITE_3_CPHY_RX_CLK 83 +#define CAM_CC_IFE_LITE_3_CSID_CLK 84 +#define CAM_CC_IFE_LITE_3_CSID_CLK_SRC 85 +#define CAM_CC_IPE_0_AHB_CLK 86 +#define CAM_CC_IPE_0_AREG_CLK 87 +#define CAM_CC_IPE_0_AXI_CLK 88 +#define CAM_CC_IPE_0_CLK 89 +#define CAM_CC_IPE_0_CLK_SRC 90 +#define CAM_CC_IPE_1_AHB_CLK 91 +#define CAM_CC_IPE_1_AREG_CLK 92 +#define CAM_CC_IPE_1_AXI_CLK 93 +#define CAM_CC_IPE_1_CLK 94 +#define CAM_CC_JPEG_CLK 95 +#define CAM_CC_JPEG_CLK_SRC 96 +#define CAM_CC_LRME_CLK 97 +#define CAM_CC_LRME_CLK_SRC 98 +#define CAM_CC_MCLK0_CLK 99 +#define CAM_CC_MCLK0_CLK_SRC 100 +#define CAM_CC_MCLK1_CLK 101 +#define CAM_CC_MCLK1_CLK_SRC 102 +#define CAM_CC_MCLK2_CLK 103 +#define CAM_CC_MCLK2_CLK_SRC 104 +#define CAM_CC_MCLK3_CLK 105 +#define CAM_CC_MCLK3_CLK_SRC 106 +#define CAM_CC_MCLK4_CLK 107 +#define CAM_CC_MCLK4_CLK_SRC 108 +#define CAM_CC_MCLK5_CLK 109 +#define CAM_CC_MCLK5_CLK_SRC 110 +#define CAM_CC_MCLK6_CLK 111 +#define CAM_CC_MCLK6_CLK_SRC 112 +#define CAM_CC_MCLK7_CLK 113 +#define CAM_CC_MCLK7_CLK_SRC 114 +#define CAM_CC_PLL0 115 +#define CAM_CC_PLL0_OUT_EVEN 116 +#define CAM_CC_PLL0_OUT_ODD 117 +#define CAM_CC_PLL1 118 +#define CAM_CC_PLL2 119 +#define CAM_CC_PLL2_OUT_MAIN 120 +#define CAM_CC_PLL3 121 +#define CAM_CC_PLL4 122 +#define CAM_CC_PLL5 123 +#define CAM_CC_PLL6 124 +#define CAM_CC_SLOW_AHB_CLK_SRC 125 +#define CAM_CC_XO_CLK_SRC 126 + + +/* CAM_CC power domains */ +#define BPS_GDSC 0 +#define IFE_0_GDSC 1 +#define IFE_1_GDSC 2 +#define IFE_2_GDSC 3 +#define IFE_3_GDSC 4 +#define IPE_0_GDSC 5 +#define IPE_1_GDSC 6 +#define TITAN_TOP_GDSC 7 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_CAMNOC_BCR 1 +#define CAM_CC_CCI_BCR 2 +#define CAM_CC_CPAS_BCR 3 +#define CAM_CC_CSI0PHY_BCR 4 +#define CAM_CC_CSI1PHY_BCR 5 +#define CAM_CC_CSI2PHY_BCR 6 +#define CAM_CC_CSI3PHY_BCR 7 +#define CAM_CC_FD_BCR 8 +#define CAM_CC_ICP_BCR 9 +#define CAM_CC_IFE_0_BCR 10 +#define CAM_CC_IFE_1_BCR 11 +#define CAM_CC_IFE_2_BCR 12 +#define CAM_CC_IFE_3_BCR 13 +#define CAM_CC_IFE_LITE_0_BCR 14 +#define CAM_CC_IFE_LITE_1_BCR 15 +#define CAM_CC_IFE_LITE_2_BCR 16 +#define CAM_CC_IFE_LITE_3_BCR 17 +#define CAM_CC_IPE_0_BCR 18 +#define CAM_CC_IPE_1_BCR 19 +#define CAM_CC_JPEG_BCR 20 +#define CAM_CC_LRME_BCR 21 +#define CAM_CC_MCLK0_BCR 22 +#define CAM_CC_MCLK1_BCR 23 +#define CAM_CC_MCLK2_BCR 24 +#define CAM_CC_MCLK3_BCR 25 +#define CAM_CC_MCLK4_BCR 26 +#define CAM_CC_MCLK5_BCR 27 +#define CAM_CC_MCLK6_BCR 28 +#define CAM_CC_MCLK7_BCR 29 + +#endif -- cgit v1.2.3 From 0b0cae7119a0ec9449d7261b5e672a5fed765068 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:43 +0300 Subject: x86/its: move its_pages array to struct mod_arch_specific The of pages with ITS thunks allocated for modules are tracked by an array in 'struct module'. Since this is very architecture specific data structure, move it to 'struct mod_arch_specific'. No functional changes. Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-4-rppt@kernel.org --- include/linux/module.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..5faa1fb1f4b4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,11 +586,6 @@ struct module { atomic_t refcnt; #endif -#ifdef CONFIG_MITIGATION_ITS - int its_num_pages; - void **its_page_array; -#endif - #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; -- cgit v1.2.3 From 7cd9a11dd0c3d1dd225795ed1b5b53132888e7b5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:45 +0300 Subject: Revert "mm/execmem: Unify early execmem_cache behaviour" The commit d6d1e3e6580c ("mm/execmem: Unify early execmem_cache behaviour") changed early behaviour of execemem ROX cache to allow its usage in early x86 code that allocates text pages when CONFIG_MITGATION_ITS is enabled. The permission management of the pages allocated from execmem for ITS mitigation is now completely contained in arch/x86/kernel/alternatives.c and therefore there is no need to special case early allocations in execmem. This reverts commit d6d1e3e6580ca35071ad474381f053cbf1fb6414. Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-6-rppt@kernel.org --- include/linux/execmem.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index ca42d5e46ccc..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -54,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); - -/* - * Called from mark_readonly(), where the system transitions to ROX. - */ -void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } -static inline void execmem_cache_make_ro(void) { } #endif /** -- cgit v1.2.3 From 6a9e2fb1bab53b54d02714a2ee3c6612d19629ce Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 6 Jun 2025 11:45:07 +0200 Subject: nsfs: move root inode number to uapi Userspace relies on the root inode numbers to identify the initial namespaces. That's already a hard dependency. So we cannot change that anymore. Move the initial inode numbers to a public header. Link: https://github.com/systemd/systemd/commit/d293fade24b34ccc2f5716b0ff5513e9533cf0c4 Link: https://lore.kernel.org/20250606-work-nsfs-v1-1-b8749c9a8844@kernel.org Signed-off-by: Christian Brauner --- include/linux/proc_ns.h | 13 +++++++------ include/uapi/linux/nsfs.h | 9 +++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 5ea470eb4d76..e77a37b23ca7 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -6,6 +6,7 @@ #define _LINUX_PROC_NS_H #include +#include struct pid_namespace; struct nsset; @@ -40,12 +41,12 @@ extern const struct proc_ns_operations timens_for_children_operations; */ enum { PROC_ROOT_INO = 1, - PROC_IPC_INIT_INO = 0xEFFFFFFFU, - PROC_UTS_INIT_INO = 0xEFFFFFFEU, - PROC_USER_INIT_INO = 0xEFFFFFFDU, - PROC_PID_INIT_INO = 0xEFFFFFFCU, - PROC_CGROUP_INIT_INO = 0xEFFFFFFBU, - PROC_TIME_INIT_INO = 0xEFFFFFFAU, + PROC_IPC_INIT_INO = IPC_NS_INIT_INO, + PROC_UTS_INIT_INO = UTS_NS_INIT_INO, + PROC_USER_INIT_INO = USER_NS_INIT_INO, + PROC_PID_INIT_INO = PID_NS_INIT_INO, + PROC_CGROUP_INIT_INO = CGROUP_NS_INIT_INO, + PROC_TIME_INIT_INO = TIME_NS_INIT_INO, }; #ifdef CONFIG_PROC_FS diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 34127653fd00..6683e7ca3996 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -42,4 +42,13 @@ struct mnt_ns_info { /* Get previous namespace. */ #define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) +enum init_ns_ino { + IPC_NS_INIT_INO = 0xEFFFFFFFU, + UTS_NS_INIT_INO = 0xEFFFFFFEU, + USER_NS_INIT_INO = 0xEFFFFFFDU, + PID_NS_INIT_INO = 0xEFFFFFFCU, + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, + TIME_NS_INIT_INO = 0xEFFFFFFAU, +}; + #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 9b0240b3ccc325c7a96cf362877180bc9e10d546 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 6 Jun 2025 11:45:08 +0200 Subject: netns: use stable inode number for initial mount ns Apart from the network and mount namespace all other namespaces expose a stable inode number and userspace has been relying on that for a very long time now. It's very much heavily used API. Align the network namespace and use a stable inode number from the reserved procfs inode number space so this is consistent across all namespaces. Link: https://lore.kernel.org/20250606-work-nsfs-v1-2-b8749c9a8844@kernel.org Reviewed-by: Jakub Kicinski Signed-off-by: Christian Brauner --- include/linux/proc_ns.h | 1 + include/uapi/linux/nsfs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index e77a37b23ca7..3ff0bd381704 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -47,6 +47,7 @@ enum { PROC_PID_INIT_INO = PID_NS_INIT_INO, PROC_CGROUP_INIT_INO = CGROUP_NS_INIT_INO, PROC_TIME_INIT_INO = TIME_NS_INIT_INO, + PROC_NET_INIT_INO = NET_NS_INIT_INO, }; #ifdef CONFIG_PROC_FS diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 6683e7ca3996..393778489d85 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -49,6 +49,7 @@ enum init_ns_ino { PID_NS_INIT_INO = 0xEFFFFFFCU, CGROUP_NS_INIT_INO = 0xEFFFFFFBU, TIME_NS_INIT_INO = 0xEFFFFFFAU, + NET_NS_INIT_INO = 0xEFFFFFF9U, }; #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 7f4f229195b73606ded77e56943f463b78adf635 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 6 Jun 2025 11:45:09 +0200 Subject: mntns: use stable inode number for initial mount ns Apart from the network and mount namespace all other namespaces expose a stable inode number and userspace has been relying on that for a very long time now. It's very much heavily used API. Align the mount namespace and use a stable inode number from the reserved procfs inode number space so this is consistent across all namespaces. Link: https://lore.kernel.org/20250606-work-nsfs-v1-3-b8749c9a8844@kernel.org Signed-off-by: Christian Brauner --- include/linux/proc_ns.h | 1 + include/uapi/linux/nsfs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 3ff0bd381704..6258455e49a4 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -48,6 +48,7 @@ enum { PROC_CGROUP_INIT_INO = CGROUP_NS_INIT_INO, PROC_TIME_INIT_INO = TIME_NS_INIT_INO, PROC_NET_INIT_INO = NET_NS_INIT_INO, + PROC_MNT_INIT_INO = MNT_NS_INIT_INO, }; #ifdef CONFIG_PROC_FS diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 393778489d85..97d8d80d139f 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -50,6 +50,7 @@ enum init_ns_ino { CGROUP_NS_INIT_INO = 0xEFFFFFFBU, TIME_NS_INIT_INO = 0xEFFFFFFAU, NET_NS_INIT_INO = 0xEFFFFFF9U, + MNT_NS_INIT_INO = 0xEFFFFFF8U, }; #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 6bdd3a01fe4627ad7a562ba38eb759eba715b671 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2025 16:00:20 +0200 Subject: fs: add missing values to TRACE_IOCB_STRINGS Make sure all values are covered. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250610140020.2227932-1-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..d27c402f1162 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,9 @@ struct readahead_control; { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; -- cgit v1.2.3 From 2fa8bf42c50582c7302918474aae8c52b59e7910 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Feb 2025 19:53:00 -0500 Subject: set_default_d_op(): calculate the matching value for ->d_flags ... and store it in ->s_d_flags, to be used by __d_alloc() Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7cd8eaab4d4e..65548e70e596 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1400,6 +1400,7 @@ struct super_block { char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; + unsigned int s_d_flags; /* default d_flags for dentries */ /* * The next field is for VFS *only*. No filesystems have any business -- cgit v1.2.3 From 691fb82ca6ccdcdb9e60e754b55659271d5280e7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Feb 2025 20:18:15 -0500 Subject: make d_set_d_op() static Convert the last user (d_alloc_pseudo()) and be done with that. Any out-of-tree filesystem using it should switch to d_splice_alias_ops() or, better yet, check whether it really needs to have ->d_op vary among its dentries. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/dcache.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index be7ae058fa90..cc3e1c1a3454 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -237,7 +237,6 @@ extern void d_instantiate_new(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); -extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); -- cgit v1.2.3 From 0b136e7d18fa8bb1251ab06f4f30e883da780245 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Feb 2025 20:59:01 -0500 Subject: kill simple_dentry_operations No users left and anything that wants it would be better off just setting DCACHE_DONTCACHE in their ->s_d_flags. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 65548e70e596..d58bbb8262e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3606,7 +3606,6 @@ extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); -extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); -- cgit v1.2.3 From 29d673b1508fcaa14be32e92679874f10a099bc8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 13 May 2024 23:36:53 -0600 Subject: make securityfs_remove() remove the entire subtree ... and fix the mount leak when anything's mounted there. securityfs_recursive_remove becomes an alias for securityfs_remove - we'll probably need to remove it in a cycle or two. Signed-off-by: Al Viro --- include/linux/security.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index dba349629229..386463b5e848 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2211,7 +2211,6 @@ struct dentry *securityfs_create_symlink(const char *name, const char *target, const struct inode_operations *iops); extern void securityfs_remove(struct dentry *dentry); -extern void securityfs_recursive_remove(struct dentry *dentry); #else /* CONFIG_SECURITYFS */ @@ -2243,6 +2242,8 @@ static inline void securityfs_remove(struct dentry *dentry) #endif +#define securityfs_recursive_remove securityfs_remove + #ifdef CONFIG_BPF_SYSCALL union bpf_attr; struct bpf_map; -- cgit v1.2.3 From e1f4b1f167581a4932dd4f017c80a6e46d28761a Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Tue, 10 Jun 2025 10:11:08 -0700 Subject: eth: Update rmon hist range The fbnic driver reports up-to 11 ranges resulting in the drop of the last range. This patch increment the value of ETHTOOL_RMON_HIST_MAX to address this limitation. Signed-off-by: Mohsin Bashir Reviewed-by: Jacob Keller Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250610171109.1481229-2-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 5e0dd333ad1f..90da1aee6e56 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -536,7 +536,7 @@ struct ethtool_rmon_hist_range { u16 high; }; -#define ETHTOOL_RMON_HIST_MAX 10 +#define ETHTOOL_RMON_HIST_MAX 11 /** * struct ethtool_rmon_stats - selected RMON (RFC 2819) statistics -- cgit v1.2.3 From c0f21029f123d1b15f8eddc8e3976bf0c8781c43 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Jun 2025 10:42:53 +0300 Subject: xfrm: always initialize offload path Offload path is used for GRO with SW IPsec, and not just for HW offload. So initialize it anyway. Fixes: 585b64f5a620 ("xfrm: delay initialization of offload path till its actually requested") Reported-by: Sabrina Dubroca Closes: https://lore.kernel.org/all/aEGW_5HfPqU1rFjl@krikkit Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a21e276dbe44..e45a275fca26 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -474,7 +474,7 @@ struct xfrm_type_offload { int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); -void xfrm_set_type_offload(struct xfrm_state *x); +void xfrm_set_type_offload(struct xfrm_state *x, bool try_load); static inline void xfrm_unset_type_offload(struct xfrm_state *x) { if (!x->type_offload) -- cgit v1.2.3 From 12b5b138d111db0588492002fdd8089af61b80e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Jun 2025 15:31:55 +0200 Subject: coredump: allow for flexible coredump handling Extend the coredump socket to allow the coredump server to tell the kernel how to process individual coredumps. When the crashing task connects to the coredump socket the kernel will send a struct coredump_req to the coredump server. The kernel will set the size member of struct coredump_req allowing the coredump server how much data can be read. The coredump server uses MSG_PEEK to peek the size of struct coredump_req. If the kernel uses a newer struct coredump_req the coredump server just reads the size it knows and discard any remaining bytes in the buffer. If the kernel uses an older struct coredump_req the coredump server just reads the size the kernel knows. The returned struct coredump_req will inform the coredump server what features the kernel supports. The coredump_req->mask member is set to the currently know features. The coredump server may only use features whose bits were raised by the kernel in coredump_req->mask. In response to a coredump_req from the kernel the coredump server sends a struct coredump_ack to the kernel. The kernel informs the coredump server what version of struct coredump_ack it supports by setting struct coredump_req->size_ack to the size it knows about. The coredump server may only send as many bytes as coredump_req->size_ack indicates (a smaller size is fine of course). The coredump server must set coredump_ack->size accordingly. The coredump server sets the features it wants to use in struct coredump_ack->mask. Only bits returned in struct coredump_req->mask may be used. In case an invalid struct coredump_ack is sent to the kernel a non-zero u32 integer is sent indicating the reason for the failure. If it was successful a zero u32 integer is sent. In the initial version the following features are supported in coredump_{req,ack}->mask: * COREDUMP_KERNEL The kernel will write the coredump data to the socket. * COREDUMP_USERSPACE The kernel will not write coredump data but will indicate to the parent that a coredump has been generated. This is used when userspace generates its own coredumps. * COREDUMP_REJECT The kernel will skip generating a coredump for this task. * COREDUMP_WAIT The kernel will prevent the task from exiting until the coredump server has shutdown the socket connection. The flexible coredump socket can be enabled by using the "@@" prefix instead of the single "@" prefix for the regular coredump socket: @@/run/systemd/coredump.socket will enable flexible coredump handling. Current kernels already enforce that "@" must be followed by "/" and will reject anything else. So extending this is backward and forward compatible. Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-1-05a5f0c18ecc@kernel.org Acked-by: Lennart Poettering Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/coredump.h | 104 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 include/uapi/linux/coredump.h (limited to 'include') diff --git a/include/uapi/linux/coredump.h b/include/uapi/linux/coredump.h new file mode 100644 index 000000000000..dc3789b78af0 --- /dev/null +++ b/include/uapi/linux/coredump.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_COREDUMP_H +#define _UAPI_LINUX_COREDUMP_H + +#include + +/** + * coredump_{req,ack} flags + * @COREDUMP_KERNEL: kernel writes coredump + * @COREDUMP_USERSPACE: userspace writes coredump + * @COREDUMP_REJECT: don't generate coredump + * @COREDUMP_WAIT: wait for coredump server + */ +enum { + COREDUMP_KERNEL = (1ULL << 0), + COREDUMP_USERSPACE = (1ULL << 1), + COREDUMP_REJECT = (1ULL << 2), + COREDUMP_WAIT = (1ULL << 3), +}; + +/** + * struct coredump_req - message kernel sends to userspace + * @size: size of struct coredump_req + * @size_ack: known size of struct coredump_ack on this kernel + * @mask: supported features + * + * When a coredump happens the kernel will connect to the coredump + * socket and send a coredump request to the coredump server. The @size + * member is set to the size of struct coredump_req and provides a hint + * to userspace how much data can be read. Userspace may use MSG_PEEK to + * peek the size of struct coredump_req and then choose to consume it in + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 + * request. If the size the kernel sends is larger userspace simply + * discards any remaining data. + * + * The coredump_req->mask member is set to the currently know features. + * Userspace may only set coredump_ack->mask to the bits raised by the + * kernel in coredump_req->mask. + * + * The coredump_req->size_ack member is set by the kernel to the size of + * struct coredump_ack the kernel knows. Userspace may only send up to + * coredump_req->size_ack bytes to the kernel and must set + * coredump_ack->size accordingly. + */ +struct coredump_req { + __u32 size; + __u32 size_ack; + __u64 mask; +}; + +enum { + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * struct coredump_ack - message userspace sends to kernel + * @size: size of the struct + * @spare: unused + * @mask: features kernel is supposed to use + * + * The @size member must be set to the size of struct coredump_ack. It + * may never exceed what the kernel returned in coredump_req->size_ack + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= + * coredump_req->size_ack). + * + * The @mask member must be set to the features the coredump server + * wants the kernel to use. Only bits the kernel returned in + * coredump_req->mask may be set. + */ +struct coredump_ack { + __u32 size; + __u32 spare; + __u64 mask; +}; + +enum { + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * enum coredump_mark - Markers for the coredump socket + * + * The kernel will place a single byte on the coredump socket. The + * markers notify userspace whether the coredump ack succeeded or + * failed. + * + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful + * @__COREDUMP_MARK_MAX: the maximum coredump mark value + */ +enum coredump_mark { + COREDUMP_MARK_REQACK = 0U, + COREDUMP_MARK_MINSIZE = 1U, + COREDUMP_MARK_MAXSIZE = 2U, + COREDUMP_MARK_UNSUPPORTED = 3U, + COREDUMP_MARK_CONFLICTING = 4U, + __COREDUMP_MARK_MAX = (1U << 31), +}; + +#endif /* _UAPI_LINUX_COREDUMP_H */ -- cgit v1.2.3 From 8a5a5cecb79058b608e5562d8998123a3adb313c Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Thu, 12 Jun 2025 12:42:52 +0800 Subject: ASoC: tas2781: Move the "include linux/debugfs.h" into tas2781.h Move the include linux/debugfs.h into tas2781.h for code clean. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250612044252.1025-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 40cd3bd079b5..3875e92f1ec5 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -17,6 +17,10 @@ #ifndef __TAS2781_H__ #define __TAS2781_H__ +#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C +#include +#endif + #include "tas2781-dsp.h" /* version number */ -- cgit v1.2.3 From da5cb65d25f747236a003b82525eb6de5d49a2e6 Mon Sep 17 00:00:00 2001 From: Raghav Sharma Date: Thu, 29 May 2025 16:56:38 +0530 Subject: dt-bindings: clock: exynosautov920: add hsi2 clock definitions Add device tree clock binding definitions for CMU_HSI2 Signed-off-by: Raghav Sharma Reviewed-by: Alim Akhtar Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250529112640.1646740-3-raghav.s@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov920.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h index 5e6896e9627f..93e6233d1358 100644 --- a/include/dt-bindings/clock/samsung,exynosautov920.h +++ b/include/dt-bindings/clock/samsung,exynosautov920.h @@ -286,4 +286,13 @@ #define CLK_MOUT_HSI1_USBDRD_USER 3 #define CLK_MOUT_HSI1_USBDRD 4 +/* CMU_HSI2 */ +#define FOUT_PLL_ETH 1 +#define CLK_MOUT_HSI2_NOC_UFS_USER 2 +#define CLK_MOUT_HSI2_UFS_EMBD_USER 3 +#define CLK_MOUT_HSI2_ETHERNET 4 +#define CLK_MOUT_HSI2_ETHERNET_USER 5 +#define CLK_DOUT_HSI2_ETHERNET 6 +#define CLK_DOUT_HSI2_ETHERNET_PTP 7 + #endif /* _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H */ -- cgit v1.2.3 From baaebe0928bf321a1cd980d569e308dec66be94c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Jun 2025 13:08:26 -0700 Subject: Revert "bpf: use common instruction history across all states" This reverts commit 96a30e469ca1d2b8cc7811b40911f8614b558241. Next patches in the series modify propagate_precision() to allow arbitrary starting state. Precision propagation requires access to jump history, and arbitrary states represent history not belonging to `env->cur_state`. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250611200836.4135542-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e6c26393c029..3e77befdbc4b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -344,7 +344,7 @@ struct bpf_func_state { #define MAX_CALL_FRAMES 8 -/* instruction history flags, used in bpf_insn_hist_entry.flags field */ +/* instruction history flags, used in bpf_jmp_history_entry.flags field */ enum { /* instruction references stack slot through PTR_TO_STACK register; * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) @@ -366,7 +366,7 @@ enum { static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); -struct bpf_insn_hist_entry { +struct bpf_jmp_history_entry { u32 idx; /* insn idx can't be bigger than 1 million */ u32 prev_idx : 20; @@ -459,14 +459,13 @@ struct bpf_verifier_state { * See get_loop_entry() for more information. */ struct bpf_verifier_state *loop_entry; - /* Sub-range of env->insn_hist[] corresponding to this state's - * instruction history. - * Backtracking is using it to go from last to first. - * For most states instruction history is short, 0-3 instructions. + /* jmp history recorded from first to last. + * backtracking is using it to go from last to first. + * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - u32 insn_hist_start; - u32 insn_hist_end; + struct bpf_jmp_history_entry *jmp_history; + u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; @@ -776,9 +775,7 @@ struct bpf_verifier_env { int cur_postorder; } cfg; struct backtrack_state bt; - struct bpf_insn_hist_entry *insn_hist; - struct bpf_insn_hist_entry *cur_hist_ent; - u32 insn_hist_cap; + struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ -- cgit v1.2.3 From 96c6aa4c63af0bb0675c41b3e61a2fc7f6fed998 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Jun 2025 13:08:27 -0700 Subject: bpf: compute SCCs in program control flow graph Compute strongly connected components in the program CFG. Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. Use Tarjan's algorithm for SCC computation adapted to run non-recursively. For debug purposes print out computed SCCs as a part of full program dump in compute_live_registers() at log level 2, e.g.: func#0 @0 Live regs before insn: 0: .......... (b4) w6 = 10 2 1: ......6... (18) r1 = 0xffff88810bbb5565 2 3: .1....6... (b4) w2 = 2 2 4: .12...6... (85) call bpf_trace_printk#6 2 5: ......6... (04) w6 += -1 2 6: ......6... (56) if w6 != 0x0 goto pc-6 7: .......... (b4) w6 = 5 1 8: ......6... (18) r1 = 0xffff88810bbb5567 1 10: .1....6... (b4) w2 = 2 1 11: .12...6... (85) call bpf_trace_printk#6 1 12: ......6... (04) w6 += -1 1 13: ......6... (56) if w6 != 0x0 goto pc-6 14: .......... (b4) w0 = 0 15: 0......... (95) exit ^^^ SCC number for the instruction Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250611200836.4135542-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3e77befdbc4b..95f5211610f4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -609,6 +609,11 @@ struct bpf_insn_aux_data { * accepts callback function as a parameter. */ bool calls_callback; + /* + * CFG strongly connected component this instruction belongs to, + * zero if it is a singleton SCC. + */ + u32 scc; /* registers alive before this instruction. */ u16 live_regs_before; }; -- cgit v1.2.3 From c9e31900b54cadf5398dfb838c0a63effa1defec Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Jun 2025 13:08:33 -0700 Subject: bpf: propagate read/precision marks over state graph backedges Current loop_entry-based exact states comparison logic does not handle the following case: .-> A --. Assume the states are visited in the order A, B, C. | | | Assume that state B reaches a state equivalent to state A. | v v At this point, state C is not processed yet, so state A '-- B C has not received any read or precision marks from C. As a result, these marks won't be propagated to B. If B has incomplete marks, it is unsafe to use it in states_equal() checks. This commit replaces the existing logic with the following: - Strongly connected components (SCCs) are computed over the program's control flow graph (intraprocedurally). - When a verifier state enters an SCC, that state is recorded as the SCC entry point. - When a verifier state is found equivalent to another (e.g., B to A in the example), it is recorded as a states graph backedge. Backedges are accumulated per SCC. - When an SCC entry state reaches `branches == 0`, read and precision marks are propagated through the backedges (e.g., from A to B, from C to A, and then again from A to B). To support nested subprogram calls, the entry state and backedge list are associated not with the SCC itself but with an object called `bpf_scc_callchain`. A callchain is a tuple `(callsite*, scc_id)`, where `callsite` is the index of a call instruction for each frame except the last. See the comments added in `is_state_visited()` and `compute_scc_callchain()` for more details. Fixes: 2a0992829ea3 ("bpf: correct loop detection for iterators convergence") Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250611200836.4135542-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 95f5211610f4..b0273f759589 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -459,6 +459,10 @@ struct bpf_verifier_state { * See get_loop_entry() for more information. */ struct bpf_verifier_state *loop_entry; + /* if this state is a backedge state then equal_state + * records cached state to which this state is equal. + */ + struct bpf_verifier_state *equal_state; /* jmp history recorded from first to last. * backtracking is using it to go from last to first. * For most states jmp_history_cnt is [0-3]. @@ -723,6 +727,37 @@ struct bpf_idset { u32 ids[BPF_ID_MAP_SIZE]; }; +/* see verifier.c:compute_scc_callchain() */ +struct bpf_scc_callchain { + /* call sites from bpf_verifier_state->frame[*]->callsite leading to this SCC */ + u32 callsites[MAX_CALL_FRAMES - 1]; + /* last frame in a chain is identified by SCC id */ + u32 scc; +}; + +/* verifier state waiting for propagate_backedges() */ +struct bpf_scc_backedge { + struct bpf_scc_backedge *next; + struct bpf_verifier_state state; +}; + +struct bpf_scc_visit { + struct bpf_scc_callchain callchain; + /* first state in current verification path that entered SCC + * identified by the callchain + */ + struct bpf_verifier_state *entry_state; + struct bpf_scc_backedge *backedges; /* list of backedges */ +}; + +/* An array of bpf_scc_visit structs sharing tht same bpf_scc_callchain->scc + * but having different bpf_scc_callchain->callsites. + */ +struct bpf_scc_info { + u32 num_visits; + struct bpf_scc_visit visits[]; +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -819,6 +854,9 @@ struct bpf_verifier_env { char tmp_str_buf[TMP_STR_BUF_LEN]; struct bpf_insn insn_buf[INSN_BUF_SIZE]; struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; + /* array of pointers to bpf_scc_info indexed by SCC id */ + struct bpf_scc_info **scc_info; + u32 scc_cnt; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) -- cgit v1.2.3 From 0e0da5f901f582b97bfeefbf1f36a27e9d427ff4 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Jun 2025 13:08:34 -0700 Subject: bpf: remove {update,get}_loop_entry functions The previous patch switched read and precision tracking for iterator-based loops from state-graph-based loop tracking to control-flow-graph-based loop tracking. This patch removes the now-unused `update_loop_entry()` and `get_loop_entry()` functions, which were part of the state-graph-based logic. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250611200836.4135542-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b0273f759589..1ae588679e20 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -449,16 +449,6 @@ struct bpf_verifier_state { /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; - /* If this state is a part of states loop this field points to some - * parent of this state such that: - * - it is also a member of the same states loop; - * - DFS states traversal starting from initial state visits loop_entry - * state before this state. - * Used to compute topmost loop entry for state loops. - * State loops might appear because of open coded iterators logic. - * See get_loop_entry() for more information. - */ - struct bpf_verifier_state *loop_entry; /* if this state is a backedge state then equal_state * records cached state to which this state is equal. */ @@ -473,11 +463,6 @@ struct bpf_verifier_state { u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - u32 used_as_loop_entry; }; #define bpf_get_spilled_reg(slot, frame, mask) \ -- cgit v1.2.3 From 0f54ff54700315caa8ed3bea36fa0ff3ebc53f56 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Jun 2025 13:08:35 -0700 Subject: bpf: include backedges in peak_states stat Count states accumulated in bpf_scc_visit->backedges in env->peak_states. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250611200836.4135542-10-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1ae588679e20..7e459e839f8b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -733,6 +733,7 @@ struct bpf_scc_visit { */ struct bpf_verifier_state *entry_state; struct bpf_scc_backedge *backedges; /* list of backedges */ + u32 num_backedges; }; /* An array of bpf_scc_visit structs sharing tht same bpf_scc_callchain->scc @@ -822,6 +823,7 @@ struct bpf_verifier_env { u32 longest_mark_read_walk; u32 free_list_size; u32 explored_states_size; + u32 num_backedges; bpfptr_t fd_array; /* bit mask to keep track of whether a register has been accessed -- cgit v1.2.3 From fac4b41741b5cd0826cf0fa5b14e177f70a6b509 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jun 2025 07:59:43 -0700 Subject: net: ethtool: require drivers to opt into the per-RSS ctx RXFH RX Flow Hashing supports using different configuration for different RSS contexts. Only two drivers seem to support it. Make sure we uniformly error out for drivers which don't. Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250611145949.2674086-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 90da1aee6e56..1a6737721d7f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -855,6 +855,8 @@ struct kernel_ethtool_ts_info { * @cap_rss_ctx_supported: indicates if the driver supports RSS * contexts via legacy API, drivers implementing @create_rxfh_context * do not have to set this bit. + * @rxfh_per_ctx_fields: device supports selecting different header fields + * for Rx hash calculation and RSS for each additional context. * @rxfh_per_ctx_key: device supports setting different RSS key for each * additional context. Netlink API should report hfunc, key, and input_xfrm * for every context, not just context 0. @@ -1084,6 +1086,7 @@ struct ethtool_ops { u32 supported_input_xfrm:8; u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; + u32 rxfh_per_ctx_fields:1; u32 rxfh_per_ctx_key:1; u32 cap_rss_rxnfc_adds:1; u32 rxfh_indir_space; -- cgit v1.2.3 From 9bb00786fc61e865e121aa20dd12aa4d1311a990 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jun 2025 07:59:44 -0700 Subject: net: ethtool: add dedicated callbacks for getting and setting rxfh fields We mux multiple calls to the drivers via the .get_nfc and .set_nfc callbacks. This is slightly inconvenient to the drivers as they have to de-mux them back. It will also be awkward for netlink code to construct struct ethtool_rxnfc when it wants to get info about RX Flow Hash, from the RSS module. Add dedicated driver callbacks. Create struct ethtool_rxfh_fields which contains only data relevant to RXFH. Maintain the names of the fields to avoid having to heavily modify the drivers. For now support both callbacks, once all drivers are converted ethtool_*et_rxfh_fields() will stop using the rxnfc callbacks. Link: https://patch.msgid.link/20250611145949.2674086-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1a6737721d7f..59877fd2a1d3 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -825,6 +825,19 @@ struct ethtool_rxfh_param { u8 input_xfrm; }; +/** + * struct ethtool_rxfh_fields - Rx Flow Hashing (RXFH) header field config + * @data: which header fields are used for hashing, bitmask of RXH_* defines + * @flow_type: L2-L4 network traffic flow type + * @rss_context: RSS context, will only be used if rxfh_per_ctx_fields is + * set in struct ethtool_ops + */ +struct ethtool_rxfh_fields { + u32 data; + u32 flow_type; + u32 rss_context; +}; + /** * struct kernel_ethtool_ts_info - kernel copy of struct ethtool_ts_info * @cmd: command number = %ETHTOOL_GET_TS_INFO @@ -970,6 +983,8 @@ struct kernel_ethtool_ts_info { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. + * @get_rxfh_fields: Get header fields used for flow hashing. + * @set_rxfh_fields: Set header fields used for flow hashing. * @create_rxfh_context: Create a new RSS context with the specified RX flow * hash indirection table, hash key, and hash function. * The &struct ethtool_rxfh_context for this context is passed in @ctx; @@ -1156,6 +1171,11 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, struct netlink_ext_ack *extack); + int (*get_rxfh_fields)(struct net_device *, + struct ethtool_rxfh_fields *); + int (*set_rxfh_fields)(struct net_device *, + const struct ethtool_rxfh_fields *, + struct netlink_ext_ack *extack); int (*create_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, const struct ethtool_rxfh_param *rxfh, -- cgit v1.2.3 From b1b36680107ede3a4ec7fa41d052971606d6b325 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 10 Jun 2025 08:03:43 +0200 Subject: net: phy: assign default match function for non-PHY MDIO devices Make mdio_device_bus_match() the default match function for non-PHY MDIO devices. Benefit is that we don't have to export this function any longer. As long as mdiodev->modalias isn't set, there's no change in behavior. mdiobus_create_device() is the only place where mdiodev->modalias gets set, but this function sets mdio_device_bus_match() as match function anyway. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/6c94e3d3-bfb0-4ddc-a518-6fddbc64e1d0@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index e43ff9f980a4..c640ba44dd6e 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -95,7 +95,6 @@ void mdio_device_remove(struct mdio_device *mdiodev); void mdio_device_reset(struct mdio_device *mdiodev, int value); int mdio_driver_register(struct mdio_driver *drv); void mdio_driver_unregister(struct mdio_driver *drv); -int mdio_device_bus_match(struct device *dev, const struct device_driver *drv); static inline void mdio_device_get(struct mdio_device *mdiodev) { -- cgit v1.2.3 From 00ee2537255e25a14360288dbd94ff62c0db497d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 10 Jun 2025 23:34:53 +0200 Subject: net: phy: move definition of genphy_c45_driver to phy_device.c genphy_c45_read_status() is exported, so we can move definition of genphy_c45_driver to phy_device.c and make it static. This helps to clean up phy.h a little. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/ead3ab17-22d0-4cd3-901c-3d493ab851e6@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e194dad1623d..c021b351ab0d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1941,9 +1941,6 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); -/* Generic C45 PHY driver */ -extern struct phy_driver genphy_c45_driver; - /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); -- cgit v1.2.3 From c4688ff47fd719e2371b984d59759f9fa09dd6a2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 11 Jun 2025 14:56:19 +0100 Subject: net: phy: simplify phy_get_internal_delay() Simplify the arguments passed to phy_get_internal_delay() - the "dev" argument is always &phydev->mdio.dev, and as the phydev is passed in, there's no need to also pass in the struct device, especially when this function is the only reason for the caller to have a local "dev" variable. Remove the redundant "dev" argument, and update the callers. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Reviewed-by: Jacob Keller Link: https://patch.msgid.link/E1uPLwB-003VzR-4C@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index c021b351ab0d..c4d8f7c82627 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1994,8 +1994,8 @@ bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp); void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); -s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, - const int *delay_values, int size, bool is_rx); +s32 phy_get_internal_delay(struct phy_device *phydev, const int *delay_values, + int size, bool is_rx); int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, enum ethtool_link_mode_bit_indices linkmode, -- cgit v1.2.3 From a9a5f41b04dd137a353d4d1d6fc7d6e80aaad193 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Jun 2025 15:56:15 -0400 Subject: xdp: Remove unused events xdp_redirect_map and xdp_redirect_map_err MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each TRACE_EVENT() defined can take up around 5K of text and meta data regardless if they are used or not. New code is being developed that will warn when a tracepoint is defined but not used. The trace events xdp_redirect_map and xdp_redirect_map_err are defined but not used, but there's also a comment that states these are kept around for backward compatibility. Which is interesting because since they are not used, any old BPF program that expects them to exist will get incorrect data (no data) when they use them. It's worse than not working, it's silently failing. Remove them as they will soon cause warnings, or if they really need to stick around, then code needs to be added to use them. Signed-off-by: Steven Rostedt (Google) Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20250611155615.0c2cf61c@batman.local.home Signed-off-by: Alexei Starovoitov --- include/trace/events/xdp.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index d3ef86c97ae3..0fe0893c2567 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -168,25 +168,6 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, #define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \ trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index) -/* not used anymore, but kept around so as not to break old programs */ -DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, - TP_PROTO(const struct net_device *dev, - const struct bpf_prog *xdp, - const void *tgt, int err, - enum bpf_map_type map_type, - u32 map_id, u32 index), - TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) -); - -DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, - TP_PROTO(const struct net_device *dev, - const struct bpf_prog *xdp, - const void *tgt, int err, - enum bpf_map_type map_type, - u32 map_id, u32 index), - TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) -); - TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, -- cgit v1.2.3 From 16f3c7ad887c1f8fd698ab568b5851cadb65b5a8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 18:20:23 -0400 Subject: xdp: tracing: Hide some xdp events under CONFIG_BPF_SYSCALL The events xdp_cpumap_kthread, xdp_cpumap_enqueue and xdp_devmap_xmit are only called when CONFIG_BPF_SYSCALL is defined. As each event can take up to 5K regardless if they are used or not, it's best not to define them when they are not used. Add #ifdef around these events when they are not used. Acked-by: Jesper Dangaard Brouer Signed-off-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20250612182023.78397b76@batman.local.home Signed-off-by: Alexei Starovoitov --- include/trace/events/xdp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 0fe0893c2567..18c0ac514fcb 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -168,6 +168,7 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, #define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \ trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index) +#ifdef CONFIG_BPF_SYSCALL TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, @@ -281,6 +282,7 @@ TRACE_EVENT(xdp_devmap_xmit, __entry->sent, __entry->drops, __entry->err) ); +#endif /* CONFIG_BPF_SYSCALL */ /* Expect users already include , but not xdp_priv.h */ #include -- cgit v1.2.3 From 5202c25dd17c54cd4c21f266d9a51b644d7cd682 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2025 10:09:00 +0200 Subject: sched/smp: Always define sched_domains_mutex_lock()/unlock(), def_root_domain and sched_domains_mutex Simplify the scheduler by making CONFIG_SMP=y primitives and data structures unconditional. Unconditionally build kernel/sched/topology.c and the main sched-domains locking primitives. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mel Gorman Cc: Sebastian Andrzej Siewior Cc: Shrikanth Hegde Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Link: https://lore.kernel.org/r/20250528080924.2273858-20-mingo@kernel.org --- include/linux/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa54d75034ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -395,15 +395,10 @@ enum uclamp_id { UCLAMP_CNT }; -#ifdef CONFIG_SMP extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; extern void sched_domains_mutex_lock(void); extern void sched_domains_mutex_unlock(void); -#else -static inline void sched_domains_mutex_lock(void) { } -static inline void sched_domains_mutex_unlock(void) { } -#endif struct sched_param { int sched_priority; -- cgit v1.2.3 From cac5cefbade90ff0bb0b393d301fa3b5234cf056 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2025 10:09:01 +0200 Subject: sched/smp: Make SMP unconditional Simplify the scheduler by making CONFIG_SMP=y primitives and data structures unconditional. Introduce transitory wrappers for functionality not yet converted to SMP. Note that this patch is pretty large, because there's no clear separation between various aspects of the SMP scheduler, it's basically a huge block of #ifdef CONFIG_SMP. A fair amount of it has to be switched on for it to boot and work on UP systems. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mel Gorman Cc: Sebastian Andrzej Siewior Cc: Shrikanth Hegde Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Link: https://lore.kernel.org/r/20250528080924.2273858-21-mingo@kernel.org --- include/linux/preempt.h | 9 --------- include/linux/sched.h | 42 ------------------------------------------ include/linux/sched/deadline.h | 4 ---- include/linux/sched/idle.h | 4 ---- include/linux/sched/nohz.h | 4 ++-- include/linux/sched/topology.h | 32 -------------------------------- 6 files changed, 2 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index b0af8d4ef6e6..1fad1c8a4c76 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -369,8 +369,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif -#ifdef CONFIG_SMP - /* * Migrate-Disable and why it is undesired. * @@ -429,13 +427,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, extern void migrate_disable(void); extern void migrate_enable(void); -#else - -static inline void migrate_disable(void) { } -static inline void migrate_enable(void) { } - -#endif /* CONFIG_SMP */ - /** * preempt_disable_nested - Disable preemption inside a normally preempt disabled section * diff --git a/include/linux/sched.h b/include/linux/sched.h index aa54d75034ea..376befdec4b0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -599,7 +599,6 @@ struct sched_entity { unsigned long runnable_weight; #endif -#ifdef CONFIG_SMP /* * Per entity load average tracking. * @@ -607,7 +606,6 @@ struct sched_entity { * collide with read-mostly values above. */ struct sched_avg avg; -#endif }; struct sched_rt_entity { @@ -837,7 +835,6 @@ struct task_struct { struct alloc_tag *alloc_tag; #endif -#ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; unsigned int wakee_flips; @@ -853,7 +850,6 @@ struct task_struct { */ int recent_used_cpu; int wake_cpu; -#endif int on_rq; int prio; @@ -912,9 +908,7 @@ struct task_struct { cpumask_t *user_cpus_ptr; cpumask_t cpus_mask; void *migration_pending; -#ifdef CONFIG_SMP unsigned short migration_disabled; -#endif unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU @@ -946,10 +940,8 @@ struct task_struct { struct sched_info sched_info; struct list_head tasks; -#ifdef CONFIG_SMP struct plist_node pushable_tasks; struct rb_node pushable_dl_tasks; -#endif struct mm_struct *mm; struct mm_struct *active_mm; @@ -1843,7 +1835,6 @@ extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpu extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); -#ifdef CONFIG_SMP /* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); @@ -1861,33 +1852,6 @@ extern void release_user_cpus_ptr(struct task_struct *p); extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); -#else -static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -{ -} -static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) -{ - /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */ - if ((*cpumask_bits(new_mask) & 1) == 0) - return -EINVAL; - return 0; -} -static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) -{ - if (src->user_cpus_ptr) - return -EINVAL; - return 0; -} -static inline void release_user_cpus_ptr(struct task_struct *p) -{ - WARN_ON(p->user_cpus_ptr); -} - -static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) -{ - return 0; -} -#endif extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); @@ -1976,11 +1940,7 @@ extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); -#ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); -#else -static inline void kick_process(struct task_struct *tsk) { } -#endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); #define set_task_comm(tsk, from) ({ \ @@ -2225,7 +2185,6 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_SMP static inline bool owner_on_cpu(struct task_struct *owner) { /* @@ -2237,7 +2196,6 @@ static inline bool owner_on_cpu(struct task_struct *owner) /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu); -#endif /* CONFIG_SMP */ #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index f9aabbc9d22e..c40115d4e34d 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -29,15 +29,11 @@ static inline bool dl_time_before(u64 a, u64 b) return (s64)(a - b) < 0; } -#ifdef CONFIG_SMP - struct root_domain; extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); extern void dl_clear_root_domain_cpu(int cpu); -#endif /* CONFIG_SMP */ - extern u64 dl_cookie; extern bool dl_bw_visited(int cpu, u64 cookie); diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 439f6029d3b9..8465ff1f20d1 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -11,11 +11,7 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; -#ifdef CONFIG_SMP extern void wake_up_if_idle(int cpu); -#else -static inline void wake_up_if_idle(int cpu) { } -#endif /* * Idle thread specific functions to determine the need_resched diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 6d67e9a5af6b..0db7f67935fe 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -6,7 +6,7 @@ * This is the interface between the scheduler and nohz/dynticks: */ -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#ifdef CONFIG_NO_HZ_COMMON extern void nohz_balance_enter_idle(int cpu); extern int get_nohz_timer_target(void); #else @@ -23,7 +23,7 @@ static inline void calc_load_nohz_remote(struct rq *rq) { } static inline void calc_load_nohz_stop(void) { } #endif /* CONFIG_NO_HZ_COMMON */ -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +#ifdef CONFIG_NO_HZ_COMMON extern void wake_up_nohz_cpu(int cpu); #else static inline void wake_up_nohz_cpu(int cpu) { } diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 198bb5cc1774..e54e7fa76ba6 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -9,7 +9,6 @@ /* * sched-domains (multiprocessor balancing) declarations: */ -#ifdef CONFIG_SMP /* Generate SD flag indexes */ #define SD_FLAG(name, mflags) __##name, @@ -200,37 +199,6 @@ extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); # define SD_INIT_NAME(type) .name = #type -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} - -static inline bool cpus_equal_capacity(int this_cpu, int that_cpu) -{ - return true; -} - -static inline bool cpus_share_cache(int this_cpu, int that_cpu) -{ - return true; -} - -static inline bool cpus_share_resources(int this_cpu, int that_cpu) -{ - return true; -} - -static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) -{ -} - -#endif /* !CONFIG_SMP */ - #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern void rebuild_sched_domains_energy(void); #else -- cgit v1.2.3 From 06ddd17521bf11a3e7f59dafdf5c148f29467d2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2025 10:09:02 +0200 Subject: sched/smp: Always define is_percpu_thread() and scheduler_ipi() Simplify the scheduler by making the CONFIG_SMP=y primitives of is_percpu_thread() and scheduler_ipi() unconditional. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mel Gorman Cc: Sebastian Andrzej Siewior Cc: Shrikanth Hegde Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Link: https://lore.kernel.org/r/20250528080924.2273858-22-mingo@kernel.org --- include/linux/sched.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 376befdec4b0..eec6b225e9d1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1765,12 +1765,8 @@ extern struct pid *cad_pid; static __always_inline bool is_percpu_thread(void) { -#ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && (current->nr_cpus_allowed == 1); -#else - return true; -#endif } /* Per-process atomic flags. */ @@ -1967,7 +1963,6 @@ extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec buf; \ }) -#ifdef CONFIG_SMP static __always_inline void scheduler_ipi(void) { /* @@ -1977,9 +1972,6 @@ static __always_inline void scheduler_ipi(void) */ preempt_fold_need_resched(); } -#else -static inline void scheduler_ipi(void) { } -#endif extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); -- cgit v1.2.3 From 1f25730e5a780b33f78e3ea23e64d3f75e0b2042 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2025 10:09:07 +0200 Subject: sched/smp: Use the SMP version of sched_exec() Simplify the scheduler making CONFIG_SMP=y sched_exec() code unconditional. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mel Gorman Cc: Sebastian Andrzej Siewior Cc: Shrikanth Hegde Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Link: https://lore.kernel.org/r/20250528080924.2273858-27-mingo@kernel.org --- include/linux/sched/task.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ca1db4b92c32..c517dbc242f7 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -109,11 +109,7 @@ int kernel_wait(pid_t pid, int *stat); extern void free_task(struct task_struct *tsk); /* sched_exec is called by processes performing an exec */ -#ifdef CONFIG_SMP extern void sched_exec(void); -#else -#define sched_exec() {} -#endif static inline struct task_struct *get_task_struct(struct task_struct *t) { -- cgit v1.2.3 From dfce24f0032439113848939816ef78b6e83f4086 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 12 Jun 2025 17:00:22 +0100 Subject: ALSA: hda: cs35l41: Add support for center channel in CS35L41 HDA Currently only left and right channels are supported for each amp. Support is needed for a center channel, using both left and right channel audio. Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250612160029.848104-2-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- include/sound/cs35l41.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 43c6a9ef8d9f..7542cabfa726 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -609,6 +609,18 @@ #define CS35L41_DSP_NG_DELAY_MASK 0x0F00 #define CS35L41_DSP_NG_DELAY_SHIFT 8 +#define CS35L41_ASP_RX1_EN_MASK 0x00010000 +#define CS35L41_ASP_RX1_EN_SHIFT 16 +#define CS35L41_ASP_RX2_EN_MASK 0x00020000 +#define CS35L41_ASP_RX2_EN_SHIFT 17 +#define CS35L41_ASP_TX1_EN_MASK 0x00000001 +#define CS35L41_ASP_TX1_EN_SHIFT 0 +#define CS35L41_ASP_TX2_EN_MASK 0x00000002 +#define CS35L41_ASP_TX2_EN_SHIFT 1 +#define CS35L41_ASP_TX3_EN_MASK 0x00000004 +#define CS35L41_ASP_TX3_EN_SHIFT 2 +#define CS35L41_ASP_TX4_EN_MASK 0x00000008 +#define CS35L41_ASP_TX4_EN_SHIFT 3 #define CS35L41_ASP_FMT_MASK 0x0700 #define CS35L41_ASP_FMT_SHIFT 8 #define CS35L41_ASP_DOUT_HIZ_MASK 0x03 -- cgit v1.2.3 From ac90aad0e9bf7c37e706fdc08ce763a553890bdf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Jun 2025 10:47:09 -0700 Subject: crypto: testmgr - reinstate kconfig control over full self-tests Commit 698de822780f ("crypto: testmgr - make it easier to enable the full set of tests") removed support for building kernels that run only the "fast" set of crypto self-tests by default. This assumed that nearly everyone actually wanted the full set of tests, *if* they had already chosen to enable the tests at all. Unfortunately, it turns out that both Debian and Fedora intentionally have the crypto self-tests enabled in their production kernels. And for production kernels we do need to keep the testing time down, which implies just running the "fast" tests, not the full set of tests. For Fedora, a reason for enabling the tests in production is that they are being (mis)used to meet the FIPS 140-3 pre-operational testing requirement. However, the other reason for enabling the tests in production, which applies to both distros, is that they provide some value in protecting users from buggy drivers. Unfortunately, the crypto/ subsystem has many buggy and untested drivers for off-CPU hardware accelerators on rare platforms. These broken drivers get shipped to users, and there have been multiple examples of the tests preventing these buggy drivers from being used. So effectively, the tests are being relied on in production kernels. I think this is kind of crazy (untested drivers should just not be enabled at all), but that seems to be how things work currently. Thus, reintroduce a kconfig option that controls the level of testing. Call it CRYPTO_SELFTESTS_FULL instead of the original name CRYPTO_MANAGER_EXTRA_TESTS, which was slightly misleading. Moreover, given the "production kernel" use case, make CRYPTO_SELFTESTS depend on EXPERT instead of DEBUG_KERNEL. I also haven't reinstated all the #ifdefs in crypto/testmgr.c. Instead, just rely on the compiler to optimize out unused code. Fixes: 40b9969796bf ("crypto: testmgr - replace CRYPTO_MANAGER_DISABLE_TESTS with CRYPTO_SELFTESTS") Fixes: 698de822780f ("crypto: testmgr - make it easier to enable the full set of tests") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/internal/simd.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index 7e7f1ac3b7fd..9e338e7aafbd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -44,9 +44,11 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. + * self-tests, in order to test the no-SIMD fallback code. This override is + * currently limited to configurations where the "full" self-tests are enabled, + * because it might be a bit too invasive to be part of the "fast" self-tests. */ -#ifdef CONFIG_CRYPTO_SELFTESTS +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) -- cgit v1.2.3 From 71203f68c7749609d7fc8ae6ad054bdedeb24f91 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 24 May 2025 20:32:20 +0800 Subject: padata: Fix pd UAF once and for all There is a race condition/UAF in padata_reorder that goes back to the initial commit. A reference count is taken at the start of the process in padata_do_parallel, and released at the end in padata_serial_worker. This reference count is (and only is) required for padata_replace to function correctly. If padata_replace is never called then there is no issue. In the function padata_reorder which serves as the core of padata, as soon as padata is added to queue->serial.list, and the associated spin lock released, that padata may be processed and the reference count on pd would go away. Fix this by getting the next padata before the squeue->serial lock is released. In order to make this possible, simplify padata_reorder by only calling it once the next padata arrives. Fixes: 16295bec6398 ("padata: Generic parallelization/serialization interface") Signed-off-by: Herbert Xu --- include/linux/padata.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/padata.h b/include/linux/padata.h index 0146daf34430..b486c7359de2 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -91,7 +91,6 @@ struct padata_cpumask { * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. * @reorder_work: work struct for reordering. - * @lock: Reorder lock. */ struct parallel_data { struct padata_shell *ps; @@ -102,8 +101,6 @@ struct parallel_data { unsigned int processed; int cpu; struct padata_cpumask cpumask; - struct work_struct reorder_work; - spinlock_t ____cacheline_aligned lock; }; /** -- cgit v1.2.3 From 09735f0624b494c0959f3327af009283567af320 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Jun 2025 13:27:13 +0530 Subject: smp: Fix typo in comment for raw_smp_processor_id() The comment in `smp.h` incorrectly refers to `raw_processor_id()` instead of the correct function name `raw_smp_processor_id()`. Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Signed-off-by: Thomas Gleixner Reviewed-by: Boqun Feng Link: https://lore.kernel.org/all/d096779819962c305b85cd12bda41b593e0981aa.1749536622.git.viresh.kumar@linaro.org --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index f1aa0952e8c3..bea8d2826e09 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -234,7 +234,7 @@ static inline int get_boot_cpu_id(void) #endif /* !SMP */ /** - * raw_processor_id() - get the current (unstable) CPU id + * raw_smp_processor_id() - get the current (unstable) CPU id * * For then you know what you are doing and need an unstable * CPU id. -- cgit v1.2.3 From a2fc422ed75748eef2985454e97847fb22f873c2 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 21 May 2025 17:04:29 +0200 Subject: syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON There are two possible scenarios for syscall filtering: - having a trusted/allowed range of PCs, and intercepting everything else - or the opposite: a single untrusted/intercepted range and allowing everything else (this is relevant for any kind of sandboxing scenario, or monitoring behavior of a single library) The current API only allows the former use case due to allowed range wrap-around check. Add PR_SYS_DISPATCH_INCLUSIVE_ON that enables the second use case. Add PR_SYS_DISPATCH_EXCLUSIVE_ON alias for PR_SYS_DISPATCH_ON to make it clear how it's different from the new PR_SYS_DISPATCH_INCLUSIVE_ON. Signed-off-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/97947cc8e205ff49675826d7b0327ef2e2c66eea.1747839857.git.dvyukov@google.com --- include/uapi/linux/prctl.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43dec6eed559..9785c1d49f05 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -255,7 +255,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 -- cgit v1.2.3 From b776999bf25ddca9880bc3c9c30b8f84a748504b Mon Sep 17 00:00:00 2001 From: RubenKelevra Date: Thu, 12 Jun 2025 16:50:12 +0200 Subject: net: pfcp: fix typo in message_priority field name The field is spelled "message_priprity" in the big-endian bit-field definition. Nothing in-tree currently references the member, so the typo does not break kernel builds, but it is clearly incorrect. Signed-off-by: RubenKelevra Link: https://patch.msgid.link/20250612145012.185321-1-rubenkelevra@gmail.com Signed-off-by: Jakub Kicinski --- include/net/pfcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pfcp.h b/include/net/pfcp.h index af14f970b80e..639553797d3e 100644 --- a/include/net/pfcp.h +++ b/include/net/pfcp.h @@ -45,7 +45,7 @@ struct pfcphdr_session { reserved:4; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved:4, - message_priprity:4; + message_priority:4; #else #error "Please fix " #endif -- cgit v1.2.3 From 91695b8592638c85dc78a15d59250c62b9c68891 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 12 Jun 2025 16:21:04 +0100 Subject: net: phy: improve rgmii_clock() documentation Improve the rgmii_clock() documentation to indicate that it can also be used for MII, GMII and RMII modes as well as RGMII as the required clock rates are identical, but note that it won't error out for 1G speeds for MII and RMII. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uPjjk-0049pI-MD@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index c4d8f7c82627..8e2e4fcd050e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -269,8 +269,10 @@ static inline const char *phy_modes(phy_interface_t interface) * rgmii_clock - map link speed to the clock rate * @speed: link speed value * - * Description: maps RGMII supported link speeds - * into the clock rates. + * Description: maps RGMII supported link speeds into the clock rates. + * This can also be used for MII, GMII, and RMII interface modes as the + * clock rates are indentical, but the caller must be aware that errors + * for unsupported clock rates will not be signalled. * * Returns: clock rate or negative errno */ -- cgit v1.2.3 From c035e736038045b411cb368e63f07bc2f5dbc0e1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 12 Jun 2025 17:28:33 +0200 Subject: dpll: add phase-offset-monitor feature to netlink spec Add enum dpll_feature_state for control over features. Add dpll device level attribute: DPLL_A_PHASE_OFFSET_MONITOR - to allow control over a phase offset monitor feature. Attribute is present and shall return current state of a feature (enum dpll_feature_state), if the device driver provides such capability, otherwie attribute shall not be present. Reviewed-by: Aleksandr Loktionov Reviewed-by: Milena Olech Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Acked-by: Vadim Fedorenko Link: https://patch.msgid.link/20250612152835.1703397-2-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index bf97d4b6d51f..349e1b3ca1ae 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -192,6 +192,17 @@ enum dpll_pin_capabilities { #define DPLL_PHASE_OFFSET_DIVIDER 1000 +/** + * enum dpll_feature_state - Allow control (enable/disable) and status checking + * over features. + * @DPLL_FEATURE_STATE_DISABLE: feature shall be disabled + * @DPLL_FEATURE_STATE_ENABLE: feature shall be enabled + */ +enum dpll_feature_state { + DPLL_FEATURE_STATE_DISABLE, + DPLL_FEATURE_STATE_ENABLE, +}; + enum dpll_a { DPLL_A_ID = 1, DPLL_A_MODULE_NAME, @@ -204,6 +215,7 @@ enum dpll_a { DPLL_A_TYPE, DPLL_A_LOCK_STATUS_ERROR, DPLL_A_CLOCK_QUALITY_LEVEL, + DPLL_A_PHASE_OFFSET_MONITOR, __DPLL_A_MAX, DPLL_A_MAX = (__DPLL_A_MAX - 1) -- cgit v1.2.3 From 2952daf44a84670a6aa9e13edbc105bdab83ccba Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 12 Jun 2025 17:28:34 +0200 Subject: dpll: add phase_offset_monitor_get/set callback ops Add new callback operations for a dpll device: - phase_offset_monitor_get(..) - to obtain current state of phase offset monitor feature from dpll device, - phase_offset_monitor_set(..) - to allow feature configuration. Obtain the feature state value using the get callback and provide it to the user if the device driver implements callbacks. Execute the set callback upon user requests. Reviewed-by: Milena Olech Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Acked-by: Vadim Fedorenko Link: https://patch.msgid.link/20250612152835.1703397-3-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- include/linux/dpll.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 5e4f9ab1cf75..6ad6c2968a28 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -30,6 +30,14 @@ struct dpll_device_ops { void *dpll_priv, unsigned long *qls, struct netlink_ext_ack *extack); + int (*phase_offset_monitor_set)(const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_feature_state state, + struct netlink_ext_ack *extack); + int (*phase_offset_monitor_get)(const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_feature_state *state, + struct netlink_ext_ack *extack); }; struct dpll_pin_ops { -- cgit v1.2.3 From 385a766bed48c5bcf620061f24e864dafeca671a Mon Sep 17 00:00:00 2001 From: Igor Belwon Date: Thu, 15 May 2025 16:43:02 +0200 Subject: phy: exynos5-usbdrd: Add support for the Exynos990 usbdrd phy The Exynos990 usbdrd PHY is a combo PHY which supports USB SS, HS and DisplayPort outputs. This commit adds support only for UTMI+ (USB HS). Reviewed-by: Krzysztof Kozlowski Signed-off-by: Igor Belwon Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250515-usb-resends-may-15-v3-2-ad33a85b6cee@mentallysanemainliners.org Signed-off-by: Vinod Koul --- include/linux/soc/samsung/exynos-regs-pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 1a2c0e0838f9..7754697e5810 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -662,6 +662,9 @@ #define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) +/* For Exynos990 */ +#define EXYNOS990_PHY_CTRL_USB20 (0x72C) + /* For Tensor GS101 */ /* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) -- cgit v1.2.3 From 0c17270f9b920e4e1777488f1911bbfdaf2af3be Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 12 Jun 2025 14:27:07 +0000 Subject: net: sysfs: Implement is_visible for phys_(port_id, port_name, switch_id) phys_port_id_show, phys_port_name_show and phys_switch_id_show would return -EOPNOTSUPP if the netdev didn't implement the corresponding method. There is no point in creating these files if they are unsupported. Put these attributes in netdev_phys_group and implement the is_visible method. make phys_(port_id, port_name, switch_id) invisible if the netdev dosen't implement the corresponding method. Signed-off-by: Yajun Deng Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250612142707.4644-1-yajun.deng@linux.dev Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index adb14db25798..9cbc4e54b7e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2388,7 +2388,7 @@ struct net_device { struct dm_hw_stat_delta __rcu *dm_private; #endif struct device dev; - const struct attribute_group *sysfs_groups[4]; + const struct attribute_group *sysfs_groups[5]; const struct attribute_group *sysfs_rx_queue_group; const struct rtnl_link_ops *rtnl_link_ops; -- cgit v1.2.3 From 72bf1441231ab421a380771e37a5c595493db178 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 15 Jun 2025 22:32:51 +0900 Subject: firewire: core: allocate workqueue for AR/AT request/response contexts Some tasklets (softIRQs) are still used as bottom-halves to handle events for 1394 OHCI AR/AT contexts. However, using softIRQs for IRQ bottom halves is generally discouraged today. This commit adds a per-fw_card workqueue to accommodate the behaviour specified by the 1394 OHCI specification. According to the 1394 OHCI specification, system memory pages are reserved for each asynchronous DMA context. This allows concurrent operation across contexts. In the 1394 OHCI PCI driver implementation, the hardware generates IRQs either upon receiving asynchronous packets from other nodes (incoming) or after completing transmission to them (outgoing). These independent events can occur in the same transmission cycle, therefore the max_active parameter for the workqueue is set to the total number of AR/AT contexts (=4). The WQ_UNBOUND flag is used to allow the work to be scheduled on any available core, since there is little CPU cache affinity benefit for the data. Each DMA context uses a circular descriptor list in system memory, allowing deferred data processing in software as long as buffer overrun are avoided. Since the overall operation is sleepable except for small atomic regions, WQ_BH is not used. As the descriptors contain timestamps, WQ_HIGHPRI is specified to support semi-real-time processing. The asynchronous context is also used by the SCSI over IEEE 1394 protocol implementation (sbp2), which can be part of memory reclaim paths. Therefore, WQ_MEM_RECLAIM is required. To allow uses to adjust CPU affinity according to workload, WQ_SYSFS is specified so that workqueue attributes are exposed to user space. Link: https://lore.kernel.org/r/20250615133253.433057-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index b632eec3ab52..c55b8e30e700 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -136,6 +136,7 @@ struct fw_card { __be32 maint_utility_register; struct workqueue_struct *isoc_wq; + struct workqueue_struct *async_wq; }; static inline struct fw_card *fw_card_get(struct fw_card *card) -- cgit v1.2.3 From aef6bcc0f278eba408751f8b3e0beae992e9faec Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 15 Jun 2025 22:32:53 +0900 Subject: firewire: ohci: use workqueue to handle events of AT request/response contexts This commit adds a work item to handle events of 1394 OHCI AT request/response contexts, and queues the item to the specific workqueue. The call of struct fw_packet.callbaqck() is done in the workqueue when receiving acknowledgement to the asynchronous packet transferred to remote node. Link: https://lore.kernel.org/r/20250615133253.433057-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index c55b8e30e700..cceb70415ed2 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -308,8 +308,7 @@ struct fw_packet { * For successful transmission, the status code is the ack received * from the destination. Otherwise it is one of the juju-specific * rcodes: RCODE_SEND_ERROR, _CANCELLED, _BUSY, _GENERATION, _NO_ACK. - * The callback can be called from tasklet context and thus - * must never block. + * The callback can be called from workqueue and thus must never block. */ fw_packet_callback_t callback; int ack; @@ -382,6 +381,10 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode * * A variation of __fw_send_request() to generate callback for response subaction without time * stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the + * current context instead. */ static inline void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, @@ -411,6 +414,10 @@ static inline void fw_send_request(struct fw_card *card, struct fw_transaction * * @callback_data: data to be passed to the transaction completion callback * * A variation of __fw_send_request() to generate callback for response subaction with time stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the current + * context instead. */ static inline void fw_send_request_with_tstamp(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, unsigned long long offset, -- cgit v1.2.3 From 543f5e314282c4c2e5114f88ddecc9aeaf0985e2 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Thu, 12 Jun 2025 20:39:30 +0530 Subject: phy: exynos-mipi-video: introduce support for exynos7870 Add support for Exynos7870 in the existing MIPI CSIS/DSIM driver. The SoC has one DSIM phy and three CSIS phys. Signed-off-by: Kaustabh Chakraborty Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250612-exynos7870-mipi-phy-v1-2-3fff0b62d9d3@disroot.org Signed-off-by: Vinod Koul --- include/linux/soc/samsung/exynos-regs-pmu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 7754697e5810..fa28a8784d65 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -665,6 +665,11 @@ /* For Exynos990 */ #define EXYNOS990_PHY_CTRL_USB20 (0x72C) +/* For Exynos7870 */ +#define EXYNOS7870_MIPI_PHY_CONTROL0 (0x070c) +#define EXYNOS7870_MIPI_PHY_CONTROL1 (0x0714) +#define EXYNOS7870_MIPI_PHY_CONTROL2 (0x0734) + /* For Tensor GS101 */ /* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) -- cgit v1.2.3 From cf0233491b3a15933234a26efd9ecbc1c0764674 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jun 2025 14:25:49 +0300 Subject: phy: use per-PHY lockdep keys If the PHY driver uses another PHY internally (e.g. in case of eUSB2, repeaters are represented as PHYs), then it would trigger the following lockdep splat because all PHYs use a single static lockdep key and thus lockdep can not identify whether there is a dependency or not and reports a false positive. Make PHY subsystem use dynamic lockdep keys, assigning each driver a separate key. This way lockdep can correctly identify dependency graph between mutexes. ============================================ WARNING: possible recursive locking detected 6.15.0-rc7-next-20250522-12896-g3932f283970c #3455 Not tainted -------------------------------------------- kworker/u51:0/78 is trying to acquire lock: ffff0008116554f0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c but task is already holding lock: ffff000813c10cf0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&phy->mutex); lock(&phy->mutex); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by kworker/u51:0/78: #0: ffff000800010948 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x18c/0x5ec #1: ffff80008036bdb0 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work+0x1b4/0x5ec #2: ffff0008094ac8f8 (&dev->mutex){....}-{4:4}, at: __device_attach+0x38/0x188 #3: ffff000813c10cf0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c stack backtrace: CPU: 0 UID: 0 PID: 78 Comm: kworker/u51:0 Not tainted 6.15.0-rc7-next-20250522-12896-g3932f283970c #3455 PREEMPT Hardware name: Qualcomm CRD, BIOS 6.0.240904.BOOT.MXF.2.4-00528.1-HAMOA-1 09/ 4/2024 Workqueue: events_unbound deferred_probe_work_func Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x90/0xd0 dump_stack+0x18/0x24 print_deadlock_bug+0x258/0x348 __lock_acquire+0x10fc/0x1f84 lock_acquire+0x1c8/0x338 __mutex_lock+0xb8/0x59c mutex_lock_nested+0x24/0x30 phy_init+0x4c/0x12c snps_eusb2_hsphy_init+0x54/0x1a0 phy_init+0xe0/0x12c dwc3_core_init+0x450/0x10b4 dwc3_core_probe+0xce4/0x15fc dwc3_probe+0x64/0xb0 platform_probe+0x68/0xc4 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x208/0x5ec worker_thread+0x1c0/0x368 kthread+0x14c/0x20c ret_from_fork+0x10/0x20 Fixes: 3584f6392f09 ("phy: qcom: phy-qcom-snps-eusb2: Add support for eUSB2 repeater") Fixes: e2463559ff1d ("phy: amlogic: Add Amlogic AXG PCIE PHY Driver") Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Reported-by: Johan Hovold Link: https://lore.kernel.org/lkml/ZnpoAVGJMG4Zu-Jw@hovoldconsulting.com/ Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250605-phy-subinit-v3-1-1e1e849e10cd@oss.qualcomm.com Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 437769e061b7..13add0c2c407 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -154,6 +154,7 @@ struct phy_attrs { * @id: id of the phy device * @ops: function pointers for performing phy operations * @mutex: mutex to protect phy_ops + * @lockdep_key: lockdep information for this mutex * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers * @attrs: used to specify PHY specific attributes @@ -165,6 +166,7 @@ struct phy { int id; const struct phy_ops *ops; struct mutex mutex; + struct lock_class_key lockdep_key; int init_count; int power_count; struct phy_attrs attrs; -- cgit v1.2.3 From e7af416aebb36e6681b9c6950d0f6352aee7c084 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 16 Jun 2025 11:30:52 +0100 Subject: firmware: cs_dsp: Remove unused struct list_head from cs_dsp_coeff_ctl Remove two unused pointers from struct cs_dsp_coeff_ctl by taking the struct list_head out of struct cs_dsp_alg_region. On a x86_64 build this saves 16 bytes per control. Each cs_dsp_coeff_ctl instance needs to keep information about the algorithm region it refers to. This is done by embedding an instance of struct cs_dsp_alg_region. But cs_dsp_alg_region was also used to store entries in a list of algorithm regions, and so had a struct list_head object for that purpose. This list_head object is not used with the embedded object in struct cs_dsp_alg_region so was just wasted bytes. A new struct cs_dsp_alg_region_list_item has been defined for creating the list of algorithm regions. It contains a struct cs_dsp_alg_region and a struct list_head. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250616103052.66537-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 7cae703b3137..a66eb7624730 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -64,14 +64,12 @@ struct cs_dsp_region { /** * struct cs_dsp_alg_region - Describes a logical algorithm region in DSP address space - * @list: List node for internal use * @alg: Algorithm id * @ver: Expected algorithm version * @type: Memory region type * @base: Address of region */ struct cs_dsp_alg_region { - struct list_head list; unsigned int alg; unsigned int ver; int type; -- cgit v1.2.3 From bc9241367aac08de44633fd957b2452a6da8e6d4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 13 Jun 2025 09:28:10 +1000 Subject: VFS: change old_dir and new_dir in struct renamedata to dentrys all users of 'struct renamedata' have the dentry for the old and new directories, and often have no use for the inode except to store it in the renamedata. This patch changes struct renamedata to hold the dentry, rather than the inode, for the old and new directories, and changes callers to match. The names are also changed from a _dir suffix to _parent. This is consistent with other usage in namei.c and elsewhere. This results in the removal of several local variables and several dereferences of ->d_inode at the cost of adding ->d_inode dereferences to vfs_rename(). Acked-by: Miklos Szeredi Reviewed-by: Chuck Lever Reviewed-by: Namjae Jeon Signed-off-by: NeilBrown Link: https://lore.kernel.org/174977089072.608730.4244531834577097454@noble.neil.brown.name Signed-off-by: Christian Brauner --- include/linux/fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..1d9586a78041 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2004,20 +2004,20 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, /** * struct renamedata - contains all information required for renaming * @old_mnt_idmap: idmap of the old mount the inode was found from - * @old_dir: parent of source + * @old_parent: parent of source * @old_dentry: source * @new_mnt_idmap: idmap of the new mount the inode was found from - * @new_dir: parent of destination + * @new_parent: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { struct mnt_idmap *old_mnt_idmap; - struct inode *old_dir; + struct dentry *old_parent; struct dentry *old_dentry; struct mnt_idmap *new_mnt_idmap; - struct inode *new_dir; + struct dentry *new_parent; struct dentry *new_dentry; struct inode **delegated_inode; unsigned int flags; -- cgit v1.2.3 From 0da3e3822cfabf062945e449f91ea3ca529eeaa4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:19 +0200 Subject: fs: move name_contains_dotdot() to header Move the helper from the firmware specific code to a header so we can reuse it for coredump sockets. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-5-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..18fdbd184eea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3264,6 +3264,22 @@ static inline bool is_dot_dotdot(const char *name, size_t len) (len == 1 || (len == 2 && name[1] == '.')); } +/** + * name_contains_dotdot - check if a file name contains ".." path components + * + * Search for ".." surrounded by either '/' or start/end of string. + */ +static inline bool name_contains_dotdot(const char *name) +{ + size_t name_len; + + name_len = strlen(name); + return strcmp(name, "..") == 0 || + strncmp(name, "../", 3) == 0 || + strstr(name, "/../") != NULL || + (name_len >= 3 && strcmp(name + name_len - 3, "/..") == 0); +} + #include /* needed for stackable file system support */ -- cgit v1.2.3 From 70e3ee31282d293c794fb5bbec8efe495c32044b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:23 +0200 Subject: coredump: rename do_coredump() to vfs_coredump() Align the naming with the rest of our helpers exposed outside of core vfs. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-9-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- include/linux/coredump.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 76e41805b92d..96e8a66da133 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -43,7 +43,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -extern void do_coredump(const kernel_siginfo_t *siginfo); +extern void vfs_coredump(const kernel_siginfo_t *siginfo); /* * Logging for the coredump code, ratelimited. @@ -63,7 +63,7 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) #else -static inline void do_coredump(const kernel_siginfo_t *siginfo) {} +static inline void vfs_coredump(const kernel_siginfo_t *siginfo) {} #define coredump_report(...) #define coredump_report_failure(...) -- cgit v1.2.3 From fda6add9243867486f8cd456d7b05395d2132e0a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 16 Jun 2025 13:59:20 -0400 Subject: workqueue: Basic memory allocation profiling support Hook alloc_workqueue and alloc_workqueue_attrs() so that they're accounted to the callsite. Since we're doing allocations on behalf of another subsystem, this helps when using memory allocation profiling to check for leaks. Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Kent Overstreet Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 6e30f275da77..e907c9bb840c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -6,6 +6,7 @@ #ifndef _LINUX_WORKQUEUE_H #define _LINUX_WORKQUEUE_H +#include #include #include #include @@ -505,7 +506,8 @@ void workqueue_softirq_dead(unsigned int cpu); * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(1, 4) struct workqueue_struct * -alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); +alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...); +#define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__)) #ifdef CONFIG_LOCKDEP /** @@ -544,8 +546,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ - alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), \ - 1, lockdep_map, ##args) + alloc_hooks(alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags),\ + 1, lockdep_map, ##args)) #endif /** @@ -577,7 +579,9 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); -struct workqueue_attrs *alloc_workqueue_attrs(void); +struct workqueue_attrs *alloc_workqueue_attrs_noprof(void); +#define alloc_workqueue_attrs(...) alloc_hooks(alloc_workqueue_attrs_noprof(__VA_ARGS__)) + void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); -- cgit v1.2.3 From b1ba03c49a711c30e24735733dfd68f2422fa150 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 18:34:21 +0900 Subject: scsi: core: Remember if a device is an ATA device scsi_add_lun() tests the device vendor string of SCSI devices to detect if a SCSI device is in fact an ATA device, in order to correctly handle SATL power management. The function scsi_cdl_enable() also requires knowing if a SCSI device is an ATA device to control the state of the device CDL feature but this function does that by testing for the presence of the VPD page 89h (ATA INFORMATION page). sd_read_write_same() also has a similar test. Simplify these different methods by adding the is_ata field to struct scsi_device to remember that a SCSI device is in fact an ATA one based on the device vendor name test. This field can also allow low level SCSI host adapter drivers to take special actions for ATA devices (e.g. to better handle ATA NCQ errors). With this, simplify scsi_cdl_enable() and sd_read_write_same(). Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250611093421.2901633-1-dlemoal@kernel.org Reviewed-by: Igor Pylypiv Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 68dd49947d04..6d6500148c4b 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -184,6 +184,11 @@ struct scsi_device { */ unsigned force_runtime_start_on_system_start:1; + /* + * Set if the device is an ATA device. + */ + unsigned is_ata:1; + unsigned removable:1; unsigned changed:1; /* Data invalid due to media change */ unsigned busy:1; /* Used to prevent races */ -- cgit v1.2.3 From 6ad5ff6e7282d1252364cc08af88260ef0ec4cda Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:19 +0200 Subject: libeth: convert to netmem Back when the libeth Rx core was initially written, devmem was a draft and netmem_ref didn't exist in the mainline. Now that it's here, make libeth MP-agnostic before introducing any new code or any new library users. When it's known that the created PP/FQ is for header buffers, use faster "unsafe" underscored netmem <--> virt accessors as netmem_is_net_iov() is always false in that case, but consumes some cycles (bit test + true branch). Reviewed-by: Mina Almasry Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/rx.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index ab05024be518..7d5dc58984b1 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (C) 2024 Intel Corporation */ +/* Copyright (C) 2024-2025 Intel Corporation */ #ifndef __LIBETH_RX_H #define __LIBETH_RX_H @@ -31,7 +31,7 @@ /** * struct libeth_fqe - structure representing an Rx buffer (fill queue element) - * @page: page holding the buffer + * @netmem: network memory reference holding the buffer * @offset: offset from the page start (to the headroom) * @truesize: total space occupied by the buffer (w/ headroom and tailroom) * @@ -40,7 +40,7 @@ * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. */ struct libeth_fqe { - struct page *page; + netmem_ref netmem; u32 offset; u32 truesize; } __aligned_largest; @@ -102,15 +102,16 @@ static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) struct libeth_fqe *buf = &fq->fqes[i]; buf->truesize = fq->truesize; - buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize); - if (unlikely(!buf->page)) + buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset, + &buf->truesize); + if (unlikely(!buf->netmem)) return DMA_MAPPING_ERROR; - return page_pool_get_dma_addr(buf->page) + buf->offset + + return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset + fq->pp->p.offset; } -void libeth_rx_recycle_slow(struct page *page); +void libeth_rx_recycle_slow(netmem_ref netmem); /** * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA @@ -126,18 +127,19 @@ void libeth_rx_recycle_slow(struct page *page); static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, u32 len) { - struct page *page = fqe->page; + netmem_ref netmem = fqe->netmem; /* Very rare, but possible case. The most common reason: * the last fragment contained FCS only, which was then * stripped by the HW. */ if (unlikely(!len)) { - libeth_rx_recycle_slow(page); + libeth_rx_recycle_slow(netmem); return false; } - page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len); + page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem, + fqe->offset, len); return true; } -- cgit v1.2.3 From 35c64b6500ef7308155bf0dc556c646e4d7b0fd3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:20 +0200 Subject: libeth: support native XDP and register memory model Expand libeth's Page Pool functionality by adding native XDP support. This means picking the appropriate headroom and DMA direction. Also, register all the created &page_pools as XDP memory models. A driver then can call xdp_rxq_info_attach_page_pool() when registering its RxQ info. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/rx.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index 7d5dc58984b1..5d991404845e 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -13,8 +13,10 @@ /* Space reserved in front of each frame */ #define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +#define LIBETH_XDP_HEADROOM (ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ + NET_IP_ALIGN) /* Maximum headroom for worst-case calculations */ -#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM +#define LIBETH_MAX_HEADROOM LIBETH_XDP_HEADROOM /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) /* Maximum supported L2-L4 header length */ @@ -66,6 +68,7 @@ enum libeth_fqe_type { * @count: number of descriptors/buffers the queue has * @type: type of the buffers this queue has * @hsplit: flag whether header split is enabled + * @xdp: flag indicating whether XDP is enabled * @buf_len: HW-writeable length per each buffer * @nid: ID of the closest NUMA node with memory */ @@ -81,6 +84,7 @@ struct libeth_fq { /* Cold fields */ enum libeth_fqe_type type:2; bool hsplit:1; + bool xdp:1; u32 buf_len; int nid; -- cgit v1.2.3 From 8591c3afe8882a00d9070daf78c384b003b596f3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:21 +0200 Subject: libeth: xdp: add XDP_TX buffers sending Start adding XDP-specific code to libeth, namely handling XDP_TX buffers (only sending). The idea is that we accumulate up to 16 buffers on the stack, then, if either the limit is reached or the polling is finished, flush them at once with only one XDPSQ cleaning (if needed). The main sending function will be aware of the sending budget and already have all the info to send the buffers, so it can't fail. Drivers need to provide 2 inline callbacks to the main sending function: for cleaning an XDPSQ and for filling descriptors; the library code takes care of the rest. Note that unlike the generic code, multi-buffer support is not wrapped here with unlikely() to not hurt header split setups. &libeth_xdp_buff is a simple extension over &xdp_buff which has a direct pointer to the corresponding Rx descriptor (and, luckily, precisely 1 CL size and 16-byte alignment on x86_64). Suggested-by: Maciej Fijalkowski # xmit logic Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/tx.h | 11 +- include/net/libeth/xdp.h | 541 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 550 insertions(+), 2 deletions(-) create mode 100644 include/net/libeth/xdp.h (limited to 'include') diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index 35614f9523f6..3e68d11914f7 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (C) 2024 Intel Corporation */ +/* Copyright (C) 2024-2025 Intel Corporation */ #ifndef __LIBETH_TX_H #define __LIBETH_TX_H @@ -12,11 +12,13 @@ /** * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion - * @LIBETH_SQE_EMPTY: unused/empty, no action required + * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX frag, no action required * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree() * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA * @LIBETH_SQE_SKB: &sk_buff, unmap and napi_consume_skb(), update stats + * @__LIBETH_SQE_XDP_START: separator between skb and XDP types + * @LIBETH_SQE_XDP_TX: &skb_shared_info, libeth_xdp_return_buff_bulk(), stats */ enum libeth_sqe_type { LIBETH_SQE_EMPTY = 0U, @@ -24,6 +26,9 @@ enum libeth_sqe_type { LIBETH_SQE_SLAB, LIBETH_SQE_FRAG, LIBETH_SQE_SKB, + + __LIBETH_SQE_XDP_START, + LIBETH_SQE_XDP_TX = __LIBETH_SQE_XDP_START, }; /** @@ -32,6 +37,7 @@ enum libeth_sqe_type { * @rs_idx: index of the last buffer from the batch this one was sent in * @raw: slab buffer to free via kfree() * @skb: &sk_buff to consume + * @sinfo: skb shared info of an XDP_TX frame * @dma: DMA address to unmap * @len: length of the mapped region to unmap * @nr_frags: number of frags in the frame this buffer belongs to @@ -46,6 +52,7 @@ struct libeth_sqe { union { void *raw; struct sk_buff *skb; + struct skb_shared_info *sinfo; }; DEFINE_DMA_UNMAP_ADDR(dma); diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h new file mode 100644 index 000000000000..4988453a3d70 --- /dev/null +++ b/include/net/libeth/xdp.h @@ -0,0 +1,541 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef __LIBETH_XDP_H +#define __LIBETH_XDP_H + +#include +#include + +#include +#include +#include + +/* + * &xdp_buff_xsk is the largest structure &libeth_xdp_buff gets casted to, + * pick maximum pointer-compatible alignment. + */ +#define __LIBETH_XDP_BUFF_ALIGN \ + (IS_ALIGNED(sizeof(struct xdp_buff_xsk), 16) ? 16 : \ + IS_ALIGNED(sizeof(struct xdp_buff_xsk), 8) ? 8 : \ + sizeof(long)) + +/** + * struct libeth_xdp_buff - libeth extension over &xdp_buff + * @base: main &xdp_buff + * @data: shortcut for @base.data + * @desc: RQ descriptor containing metadata for this buffer + * @priv: driver-private scratchspace + * + * The main reason for this is to have a pointer to the descriptor to be able + * to quickly get frame metadata from xdpmo and driver buff-to-xdp callbacks + * (as well as bigger alignment). + * Pointer/layout-compatible with &xdp_buff and &xdp_buff_xsk. + */ +struct libeth_xdp_buff { + union { + struct xdp_buff base; + void *data; + }; + + const void *desc; + unsigned long priv[] + __aligned(__LIBETH_XDP_BUFF_ALIGN); +} __aligned(__LIBETH_XDP_BUFF_ALIGN); +static_assert(offsetof(struct libeth_xdp_buff, data) == + offsetof(struct xdp_buff_xsk, xdp.data)); +static_assert(offsetof(struct libeth_xdp_buff, desc) == + offsetof(struct xdp_buff_xsk, cb)); +static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk), + __alignof(struct libeth_xdp_buff))); + +/* Common Tx bits */ + +/** + * enum - libeth_xdp internal Tx flags + * @LIBETH_XDP_TX_BULK: one bulk size at which it will be flushed to the queue + * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled + * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent + */ +enum { + LIBETH_XDP_TX_BULK = DEV_MAP_BULK_SIZE, + LIBETH_XDP_TX_BATCH = 8, + + LIBETH_XDP_TX_DROP = BIT(0), +}; + +/** + * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags + * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length + * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame + * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame + * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags + * @LIBETH_XDP_TX_FLAGS: only for ``XDP_TX``, [31:16] of ::len_fl is flags + */ +enum { + LIBETH_XDP_TX_LEN = GENMASK(15, 0), + + LIBETH_XDP_TX_FIRST = BIT(16), + LIBETH_XDP_TX_LAST = BIT(17), + LIBETH_XDP_TX_MULTI = BIT(18), + + LIBETH_XDP_TX_FLAGS = GENMASK(31, 16), +}; + +/** + * struct libeth_xdp_tx_frame - represents one XDP Tx element + * @data: frame start pointer for ``XDP_TX`` + * @len_fl: ``XDP_TX``, combined flags [31:16] and len [15:0] field for speed + * @soff: ``XDP_TX``, offset from @data to the start of &skb_shared_info + * @frag: one (non-head) frag for ``XDP_TX`` + */ +struct libeth_xdp_tx_frame { + union { + /* ``XDP_TX`` */ + struct { + void *data; + u32 len_fl; + u32 soff; + }; + + /* ``XDP_TX`` frag */ + skb_frag_t frag; + }; +} __aligned_largest; +static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == + offsetof(struct libeth_xdp_tx_frame, len_fl)); + +/** + * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending + * @prog: corresponding active XDP program + * @dev: &net_device which the frames are transmitted on + * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure + * @count: current number of frames in @bulk + * @bulk: array of queued frames for bulk Tx + * + * All XDP Tx operations queue each frame to the bulk first and flush it + * when @count reaches the array end. Bulk is always placed on the stack + * for performance. One bulk element contains all the data necessary + * for sending a frame and then freeing it on completion. + */ +struct libeth_xdp_tx_bulk { + const struct bpf_prog *prog; + struct net_device *dev; + void *xdpsq; + + u32 count; + struct libeth_xdp_tx_frame bulk[LIBETH_XDP_TX_BULK]; +} __aligned(sizeof(struct libeth_xdp_tx_frame)); + +/** + * LIBETH_XDP_ONSTACK_BULK - declare &libeth_xdp_tx_bulk on the stack + * @bq: name of the variable to declare + * + * Helper to declare a bulk on the stack with a compiler hint that it should + * not be initialized automatically (with `CONFIG_INIT_STACK_ALL_*`) for + * performance reasons. + */ +#define LIBETH_XDP_ONSTACK_BULK(bq) \ + struct libeth_xdp_tx_bulk bq __uninitialized + +/** + * struct libeth_xdpsq - abstraction for an XDPSQ + * @sqes: array of Tx buffers from the actual queue struct + * @descs: opaque pointer to the HW descriptor array + * @ntu: pointer to the next free descriptor index + * @count: number of descriptors on that queue + * @pending: pointer to the number of sent-not-completed descs on that queue + * @xdp_tx: pointer to the above + * + * Abstraction for driver-independent implementation of Tx. Placed on the stack + * and filled by the driver before the transmission, so that the generic + * functions can access and modify driver-specific resources. + */ +struct libeth_xdpsq { + struct libeth_sqe *sqes; + void *descs; + + u32 *ntu; + u32 count; + + u32 *pending; + u32 *xdp_tx; +}; + +/** + * struct libeth_xdp_tx_desc - abstraction for an XDP Tx descriptor + * @addr: DMA address of the frame + * @len: length of the frame + * @flags: XDP Tx flags + * @opts: combined @len + @flags for speed + * + * Filled by the generic functions and then passed to driver-specific functions + * to fill a HW Tx descriptor, always placed on the [function] stack. + */ +struct libeth_xdp_tx_desc { + dma_addr_t addr; + union { + struct { + u32 len; + u32 flags; + }; + aligned_u64 opts; + }; +} __aligned_largest; + +/** + * libeth_xdp_tx_xmit_bulk - main XDP Tx function + * @bulk: array of frames to send + * @xdpsq: pointer to the driver-specific XDPSQ struct + * @n: number of frames to send + * @unroll: whether to unroll the queue filling loop for speed + * @priv: driver-specific private data + * @prep: callback for cleaning the queue and filling abstract &libeth_xdpsq + * @fill: internal callback for filling &libeth_sqe and &libeth_xdp_tx_desc + * @xmit: callback for filling a HW descriptor with the frame info + * + * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for + * all types of frames. + * @unroll greatly increases the object code size, but also greatly increases + * performance. + * The compilers inline all those onstack abstractions to direct data accesses. + * + * Return: number of frames actually placed on the queue, <= @n. The function + * can't fail, but can send less frames if there's no enough free descriptors + * available. The actual free space is returned by @prep from the driver. + */ +static __always_inline u32 +libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, + u32 n, bool unroll, u64 priv, + u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), + struct libeth_xdp_tx_desc + (*fill)(struct libeth_xdp_tx_frame frm, u32 i, + const struct libeth_xdpsq *sq, u64 priv), + void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv)) +{ + struct libeth_xdpsq sq __uninitialized; + u32 this, batched, off = 0; + u32 ntu, i = 0; + + n = min(n, prep(xdpsq, &sq)); + if (unlikely(!n)) + return 0; + + ntu = *sq.ntu; + + this = sq.count - ntu; + if (likely(this > n)) + this = n; + +again: + if (!unroll) + goto linear; + + batched = ALIGN_DOWN(this, LIBETH_XDP_TX_BATCH); + + for ( ; i < off + batched; i += LIBETH_XDP_TX_BATCH) { + u32 base = ntu + i - off; + + unrolled_count(LIBETH_XDP_TX_BATCH) + for (u32 j = 0; j < LIBETH_XDP_TX_BATCH; j++) + xmit(fill(bulk[i + j], base + j, &sq, priv), + base + j, &sq, priv); + } + + if (batched < this) { +linear: + for ( ; i < off + this; i++) + xmit(fill(bulk[i], ntu + i - off, &sq, priv), + ntu + i - off, &sq, priv); + } + + ntu += this; + if (likely(ntu < sq.count)) + goto out; + + ntu = 0; + + if (i < n) { + this = n - i; + off = i; + + goto again; + } + +out: + *sq.ntu = ntu; + *sq.pending += n; + if (sq.xdp_tx) + *sq.xdp_tx += n; + + return n; +} + +/* ``XDP_TX`` bulking */ + +void libeth_xdp_return_buff_slow(struct libeth_xdp_buff *xdp); + +/** + * libeth_xdp_tx_queue_head - internal helper for queueing one ``XDP_TX`` head + * @bq: XDP Tx bulk to queue the head frag to + * @xdp: XDP buffer with the head to queue + * + * Return: false if it's the only frag of the frame, true if it's an S/G frame. + */ +static inline bool libeth_xdp_tx_queue_head(struct libeth_xdp_tx_bulk *bq, + const struct libeth_xdp_buff *xdp) +{ + const struct xdp_buff *base = &xdp->base; + + bq->bulk[bq->count++] = (typeof(*bq->bulk)){ + .data = xdp->data, + .len_fl = (base->data_end - xdp->data) | LIBETH_XDP_TX_FIRST, + .soff = xdp_data_hard_end(base) - xdp->data, + }; + + if (!xdp_buff_has_frags(base)) + return false; + + bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_MULTI; + + return true; +} + +/** + * libeth_xdp_tx_queue_frag - internal helper for queueing one ``XDP_TX`` frag + * @bq: XDP Tx bulk to queue the frag to + * @frag: frag to queue + */ +static inline void libeth_xdp_tx_queue_frag(struct libeth_xdp_tx_bulk *bq, + const skb_frag_t *frag) +{ + bq->bulk[bq->count++].frag = *frag; +} + +/** + * libeth_xdp_tx_queue_bulk - internal helper for queueing one ``XDP_TX`` frame + * @bq: XDP Tx bulk to queue the frame to + * @xdp: XDP buffer to queue + * @flush_bulk: driver callback to flush the bulk to the HW queue + * + * Return: true on success, false on flush error. + */ +static __always_inline bool +libeth_xdp_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq, + struct libeth_xdp_buff *xdp, + bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, + u32 flags)) +{ + const struct skb_shared_info *sinfo; + bool ret = true; + u32 nr_frags; + + if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && + unlikely(!flush_bulk(bq, 0))) { + libeth_xdp_return_buff_slow(xdp); + return false; + } + + if (!libeth_xdp_tx_queue_head(bq, xdp)) + goto out; + + sinfo = xdp_get_shared_info_from_buff(&xdp->base); + nr_frags = sinfo->nr_frags; + + for (u32 i = 0; i < nr_frags; i++) { + if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && + unlikely(!flush_bulk(bq, 0))) { + ret = false; + break; + } + + libeth_xdp_tx_queue_frag(bq, &sinfo->frags[i]); + } + +out: + bq->bulk[bq->count - 1].len_fl |= LIBETH_XDP_TX_LAST; + xdp->data = NULL; + + return ret; +} + +/** + * libeth_xdp_tx_fill_stats - fill &libeth_sqe with ``XDP_TX`` frame stats + * @sqe: SQ element to fill + * @desc: libeth_xdp Tx descriptor + * @sinfo: &skb_shared_info for this frame + * + * Internal helper for filling an SQE with the frame stats, do not use in + * drivers. Fills the number of frags and bytes for this frame. + */ +#define libeth_xdp_tx_fill_stats(sqe, desc, sinfo) \ + __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, __UNIQUE_ID(sqe_), \ + __UNIQUE_ID(desc_), __UNIQUE_ID(sinfo_)) + +#define __libeth_xdp_tx_fill_stats(sqe, desc, sinfo, ue, ud, us) do { \ + const struct libeth_xdp_tx_desc *ud = (desc); \ + const struct skb_shared_info *us; \ + struct libeth_sqe *ue = (sqe); \ + \ + ue->nr_frags = 1; \ + ue->bytes = ud->len; \ + \ + if (ud->flags & LIBETH_XDP_TX_MULTI) { \ + us = (sinfo); \ + ue->nr_frags += us->nr_frags; \ + ue->bytes += us->xdp_frags_size; \ + } \ +} while (0) + +/** + * libeth_xdp_tx_fill_buf - internal helper to fill one ``XDP_TX`` &libeth_sqe + * @frm: XDP Tx frame from the bulk + * @i: index on the HW queue + * @sq: XDPSQ abstraction for the queue + * @priv: private data + * + * Return: XDP Tx descriptor with the synced DMA and other info to pass to + * the driver callback. + */ +static inline struct libeth_xdp_tx_desc +libeth_xdp_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + struct libeth_xdp_tx_desc desc; + struct skb_shared_info *sinfo; + skb_frag_t *frag = &frm.frag; + struct libeth_sqe *sqe; + netmem_ref netmem; + + if (frm.len_fl & LIBETH_XDP_TX_FIRST) { + sinfo = frm.data + frm.soff; + skb_frag_fill_netmem_desc(frag, virt_to_netmem(frm.data), + offset_in_page(frm.data), + frm.len_fl); + } else { + sinfo = NULL; + } + + netmem = skb_frag_netmem(frag); + desc = (typeof(desc)){ + .addr = page_pool_get_dma_addr_netmem(netmem) + + skb_frag_off(frag), + .len = skb_frag_size(frag) & LIBETH_XDP_TX_LEN, + .flags = skb_frag_size(frag) & LIBETH_XDP_TX_FLAGS, + }; + + dma_sync_single_for_device(__netmem_get_pp(netmem)->p.dev, desc.addr, + desc.len, DMA_BIDIRECTIONAL); + + if (!sinfo) + return desc; + + sqe = &sq->sqes[i]; + sqe->type = LIBETH_SQE_XDP_TX; + sqe->sinfo = sinfo; + libeth_xdp_tx_fill_stats(sqe, &desc, sinfo); + + return desc; +} + +void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent, + u32 flags); + +/** + * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk + * @bq: bulk to flush + * @flags: XDP TX flags + * @prep: driver-specific callback to prepare the queue for sending + * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc + * @xmit: driver callback to fill a HW descriptor + * + * Internal abstraction to create bulk flush functions for drivers. + * + * Return: true if anything was sent, false otherwise. + */ +static __always_inline bool +__libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, + u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), + struct libeth_xdp_tx_desc + (*fill)(struct libeth_xdp_tx_frame frm, u32 i, + const struct libeth_xdpsq *sq, u64 priv), + void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, + u64 priv)) +{ + u32 sent, drops; + int err = 0; + + sent = libeth_xdp_tx_xmit_bulk(bq->bulk, bq->xdpsq, + min(bq->count, LIBETH_XDP_TX_BULK), + false, 0, prep, fill, xmit); + drops = bq->count - sent; + + if (unlikely(drops)) { + libeth_xdp_tx_exception(bq, sent, flags); + err = -ENXIO; + } else { + bq->count = 0; + } + + trace_xdp_bulk_tx(bq->dev, sent, drops, err); + + return likely(sent); +} + +/** + * libeth_xdp_tx_flush_bulk - wrapper to define flush of one ``XDP_TX`` bulk + * @bq: bulk to flush + * @flags: Tx flags, see above + * @prep: driver callback to prepare the queue + * @xmit: driver callback to fill a HW descriptor + */ +#define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit) \ + __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \ + xmit) + +/* Rx polling path */ + +static inline void libeth_xdp_return_va(const void *data, bool napi) +{ + netmem_ref netmem = virt_to_netmem(data); + + page_pool_put_full_netmem(__netmem_get_pp(netmem), netmem, napi); +} + +static inline void libeth_xdp_return_frags(const struct skb_shared_info *sinfo, + bool napi) +{ + for (u32 i = 0; i < sinfo->nr_frags; i++) { + netmem_ref netmem = skb_frag_netmem(&sinfo->frags[i]); + + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi); + } +} + +/** + * libeth_xdp_return_buff - free/recycle &libeth_xdp_buff + * @xdp: buffer to free + * + * Hotpath helper to free &libeth_xdp_buff. Comparing to xdp_return_buff(), + * it's faster as it gets inlined and always assumes order-0 pages and safe + * direct recycling. Zeroes @xdp->data to avoid UAFs. + */ +#define libeth_xdp_return_buff(xdp) __libeth_xdp_return_buff(xdp, true) + +static inline void __libeth_xdp_return_buff(struct libeth_xdp_buff *xdp, + bool napi) +{ + if (!xdp_buff_has_frags(&xdp->base)) + goto out; + + libeth_xdp_return_frags(xdp_get_shared_info_from_buff(&xdp->base), + napi); + +out: + libeth_xdp_return_va(xdp->data, napi); + xdp->data = NULL; +} + +#endif /* __LIBETH_XDP_H */ -- cgit v1.2.3 From 084ceda7decdbeff2bafbe2d28f57aed50b3bc46 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:22 +0200 Subject: libeth: xdp: add .ndo_xdp_xmit() helpers Add helpers for implementing .ndo_xdp_xmit(). Same as for XDP_TX, accumulate up to 16 DMA-mapped frames on the stack, then flush. If DMA mapping is failed for some reason, don't try mapping further frames, but still flush what was already prepared. DMA address of a head frame is stored in its headroom, assuming it has enough of it for an 8 (or 4) byte value. In addition to @prep and @xmit driver callbacks in XDP_TX, xmit also needs @finalize to kick the XDPSQ after filling. Signed-off-by: Alexander Lobakin Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- include/net/libeth/tx.h | 6 + include/net/libeth/xdp.h | 290 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 294 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index 3e68d11914f7..e2b62a8b4c57 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -19,6 +19,8 @@ * @LIBETH_SQE_SKB: &sk_buff, unmap and napi_consume_skb(), update stats * @__LIBETH_SQE_XDP_START: separator between skb and XDP types * @LIBETH_SQE_XDP_TX: &skb_shared_info, libeth_xdp_return_buff_bulk(), stats + * @LIBETH_SQE_XDP_XMIT: &xdp_frame, unmap and xdp_return_frame_bulk(), stats + * @LIBETH_SQE_XDP_XMIT_FRAG: &xdp_frame frag, only unmap DMA */ enum libeth_sqe_type { LIBETH_SQE_EMPTY = 0U, @@ -29,6 +31,8 @@ enum libeth_sqe_type { __LIBETH_SQE_XDP_START, LIBETH_SQE_XDP_TX = __LIBETH_SQE_XDP_START, + LIBETH_SQE_XDP_XMIT, + LIBETH_SQE_XDP_XMIT_FRAG, }; /** @@ -38,6 +42,7 @@ enum libeth_sqe_type { * @raw: slab buffer to free via kfree() * @skb: &sk_buff to consume * @sinfo: skb shared info of an XDP_TX frame + * @xdpf: XDP frame from ::ndo_xdp_xmit() * @dma: DMA address to unmap * @len: length of the mapped region to unmap * @nr_frags: number of frags in the frame this buffer belongs to @@ -53,6 +58,7 @@ struct libeth_sqe { void *raw; struct sk_buff *skb; struct skb_shared_info *sinfo; + struct xdp_frame *xdpf; }; DEFINE_DMA_UNMAP_ADDR(dma); diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 4988453a3d70..839001d901b2 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -11,6 +11,17 @@ #include #include +/* + * Defined as bits to be able to use them as a mask on Rx. + * Also used as internal return values on Tx. + */ +enum { + LIBETH_XDP_PASS = 0U, + LIBETH_XDP_DROP = BIT(0), + LIBETH_XDP_ABORTED = BIT(1), + LIBETH_XDP_TX = BIT(2), +}; + /* * &xdp_buff_xsk is the largest structure &libeth_xdp_buff gets casted to, * pick maximum pointer-compatible alignment. @@ -56,12 +67,14 @@ static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk), * @LIBETH_XDP_TX_BULK: one bulk size at which it will be flushed to the queue * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent + * @LIBETH_XDP_TX_NDO: whether the send function is called from .ndo_xdp_xmit() */ enum { LIBETH_XDP_TX_BULK = DEV_MAP_BULK_SIZE, LIBETH_XDP_TX_BATCH = 8, LIBETH_XDP_TX_DROP = BIT(0), + LIBETH_XDP_TX_NDO = BIT(1), }; /** @@ -88,6 +101,11 @@ enum { * @len_fl: ``XDP_TX``, combined flags [31:16] and len [15:0] field for speed * @soff: ``XDP_TX``, offset from @data to the start of &skb_shared_info * @frag: one (non-head) frag for ``XDP_TX`` + * @xdpf: &xdp_frame for the head frag for .ndo_xdp_xmit() + * @dma: DMA address of the non-head frag for .ndo_xdp_xmit() + * @len: frag length for .ndo_xdp_xmit() + * @flags: Tx flags for the above + * @opts: combined @len + @flags for the above for speed */ struct libeth_xdp_tx_frame { union { @@ -100,6 +118,21 @@ struct libeth_xdp_tx_frame { /* ``XDP_TX`` frag */ skb_frag_t frag; + + /* .ndo_xdp_xmit() */ + struct { + union { + struct xdp_frame *xdpf; + dma_addr_t dma; + }; + union { + struct { + u32 len; + u32 flags; + }; + aligned_u64 opts; + }; + }; }; } __aligned_largest; static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == @@ -107,7 +140,7 @@ static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == /** * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending - * @prog: corresponding active XDP program + * @prog: corresponding active XDP program, %NULL for .ndo_xdp_xmit() * @dev: &net_device which the frames are transmitted on * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure * @count: current number of frames in @bulk @@ -445,7 +478,7 @@ void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent, /** * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk * @bq: bulk to flush - * @flags: XDP TX flags + * @flags: XDP TX flags (.ndo_xdp_xmit() etc.) * @prep: driver-specific callback to prepare the queue for sending * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc * @xmit: driver callback to fill a HW descriptor @@ -495,6 +528,259 @@ __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \ xmit) +/* .ndo_xdp_xmit() implementation */ + +/** + * libeth_xdp_xmit_frame_dma - internal helper to access DMA of an &xdp_frame + * @xf: pointer to the XDP frame + * + * There's no place in &libeth_xdp_tx_frame to store DMA address for an + * &xdp_frame head. The headroom is used then, the address is placed right + * after the frame struct, naturally aligned. + * + * Return: pointer to the DMA address to use. + */ +#define libeth_xdp_xmit_frame_dma(xf) \ + _Generic((xf), \ + const struct xdp_frame *: \ + (const dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf), \ + struct xdp_frame *: \ + (dma_addr_t *)__libeth_xdp_xmit_frame_dma(xf) \ + ) + +static inline void *__libeth_xdp_xmit_frame_dma(const struct xdp_frame *xdpf) +{ + void *addr = (void *)(xdpf + 1); + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + __alignof(*xdpf) < sizeof(dma_addr_t)) + addr = PTR_ALIGN(addr, sizeof(dma_addr_t)); + + return addr; +} + +/** + * libeth_xdp_xmit_queue_head - internal helper for queueing one XDP xmit head + * @bq: XDP Tx bulk to queue the head frag to + * @xdpf: XDP frame with the head to queue + * @dev: device to perform DMA mapping + * + * Return: ``LIBETH_XDP_DROP`` on DMA mapping error, + * ``LIBETH_XDP_PASS`` if it's the only frag in the frame, + * ``LIBETH_XDP_TX`` if it's an S/G frame. + */ +static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq, + struct xdp_frame *xdpf, + struct device *dev) +{ + dma_addr_t dma; + + dma = dma_map_single(dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma)) + return LIBETH_XDP_DROP; + + *libeth_xdp_xmit_frame_dma(xdpf) = dma; + + bq->bulk[bq->count++] = (typeof(*bq->bulk)){ + .xdpf = xdpf, + .len = xdpf->len, + .flags = LIBETH_XDP_TX_FIRST, + }; + + if (!xdp_frame_has_frags(xdpf)) + return LIBETH_XDP_PASS; + + bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI; + + return LIBETH_XDP_TX; +} + +/** + * libeth_xdp_xmit_queue_frag - internal helper for queueing one XDP xmit frag + * @bq: XDP Tx bulk to queue the frag to + * @frag: frag to queue + * @dev: device to perform DMA mapping + * + * Return: true on success, false on DMA mapping error. + */ +static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq, + const skb_frag_t *frag, + struct device *dev) +{ + dma_addr_t dma; + + dma = skb_frag_dma_map(dev, frag); + if (dma_mapping_error(dev, dma)) + return false; + + bq->bulk[bq->count++] = (typeof(*bq->bulk)){ + .dma = dma, + .len = skb_frag_size(frag), + }; + + return true; +} + +/** + * libeth_xdp_xmit_queue_bulk - internal helper for queueing one XDP xmit frame + * @bq: XDP Tx bulk to queue the frame to + * @xdpf: XDP frame to queue + * @flush_bulk: driver callback to flush the bulk to the HW queue + * + * Return: ``LIBETH_XDP_TX`` on success, + * ``LIBETH_XDP_DROP`` if the frame should be dropped by the stack, + * ``LIBETH_XDP_ABORTED`` if the frame will be dropped by libeth_xdp. + */ +static __always_inline u32 +libeth_xdp_xmit_queue_bulk(struct libeth_xdp_tx_bulk *bq, + struct xdp_frame *xdpf, + bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, + u32 flags)) +{ + u32 head, nr_frags, i, ret = LIBETH_XDP_TX; + struct device *dev = bq->dev->dev.parent; + const struct skb_shared_info *sinfo; + + if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && + unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO))) + return LIBETH_XDP_DROP; + + head = libeth_xdp_xmit_queue_head(bq, xdpf, dev); + if (head == LIBETH_XDP_PASS) + goto out; + else if (head == LIBETH_XDP_DROP) + return LIBETH_XDP_DROP; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + nr_frags = sinfo->nr_frags; + + for (i = 0; i < nr_frags; i++) { + if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && + unlikely(!flush_bulk(bq, LIBETH_XDP_TX_NDO))) + break; + + if (!libeth_xdp_xmit_queue_frag(bq, &sinfo->frags[i], dev)) + break; + } + + if (unlikely(i < nr_frags)) + ret = LIBETH_XDP_ABORTED; + +out: + bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST; + + return ret; +} + +/** + * libeth_xdp_xmit_fill_buf - internal helper to fill one XDP xmit &libeth_sqe + * @frm: XDP Tx frame from the bulk + * @i: index on the HW queue + * @sq: XDPSQ abstraction for the queue + * @priv: private data + * + * Return: XDP Tx descriptor with the mapped DMA and other info to pass to + * the driver callback. + */ +static inline struct libeth_xdp_tx_desc +libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + struct libeth_xdp_tx_desc desc; + struct libeth_sqe *sqe; + struct xdp_frame *xdpf; + + if (frm.flags & LIBETH_XDP_TX_FIRST) { + xdpf = frm.xdpf; + desc.addr = *libeth_xdp_xmit_frame_dma(xdpf); + } else { + xdpf = NULL; + desc.addr = frm.dma; + } + desc.opts = frm.opts; + + sqe = &sq->sqes[i]; + dma_unmap_addr_set(sqe, dma, desc.addr); + dma_unmap_len_set(sqe, len, desc.len); + + if (!xdpf) { + sqe->type = LIBETH_SQE_XDP_XMIT_FRAG; + return desc; + } + + sqe->type = LIBETH_SQE_XDP_XMIT; + sqe->xdpf = xdpf; + libeth_xdp_tx_fill_stats(sqe, &desc, + xdp_get_shared_info_from_frame(xdpf)); + + return desc; +} + +/** + * libeth_xdp_xmit_flush_bulk - wrapper to define flush of one XDP xmit bulk + * @bq: bulk to flush + * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk() + * @prep: driver callback to prepare the queue + * @xmit: driver callback to fill a HW descriptor + */ +#define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit) \ + __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep, \ + libeth_xdp_xmit_fill_buf, xmit) + +u32 libeth_xdp_xmit_return_bulk(const struct libeth_xdp_tx_frame *bq, + u32 count, const struct net_device *dev); + +/** + * __libeth_xdp_xmit_do_bulk - internal function to implement .ndo_xdp_xmit() + * @bq: XDP Tx bulk to queue frames to + * @frames: XDP frames passed by the stack + * @n: number of frames + * @flags: flags passed by the stack + * @flush_bulk: driver callback to flush an XDP xmit bulk + * @finalize: driver callback to finalize sending XDP Tx frames on the queue + * + * Perform common checks, map the frags and queue them to the bulk, then flush + * the bulk to the XDPSQ. If requested by the stack, finalize the queue. + * + * Return: number of frames send or -errno on error. + */ +static __always_inline int +__libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, + struct xdp_frame **frames, u32 n, u32 flags, + bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, + u32 flags), + void (*finalize)(void *xdpsq, bool sent, bool flush)) +{ + u32 nxmit = 0; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + for (u32 i = 0; likely(i < n); i++) { + u32 ret; + + ret = libeth_xdp_xmit_queue_bulk(bq, frames[i], flush_bulk); + if (unlikely(ret != LIBETH_XDP_TX)) { + nxmit += ret == LIBETH_XDP_ABORTED; + break; + } + + nxmit++; + } + + if (bq->count) { + flush_bulk(bq, LIBETH_XDP_TX_NDO); + if (unlikely(bq->count)) + nxmit -= libeth_xdp_xmit_return_bulk(bq->bulk, + bq->count, + bq->dev); + } + + finalize(bq->xdpsq, nxmit, flags & XDP_XMIT_FLUSH); + + return nxmit; +} + /* Rx polling path */ static inline void libeth_xdp_return_va(const void *data, bool napi) -- cgit v1.2.3 From 26ce8eb0bb7d47c5fb36f7c12f34e4a320f14cac Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:23 +0200 Subject: libeth: xdp: add XDPSQE completion helpers Similarly to libeth_tx_complete(), add libeth_xdp_complete_tx() to handle XDP_TX and xmit buffers. Both use bulk return under the hood. Also add out of line libeth_tx_complete_any() which handles both regular and XDP frames (if libeth_xdp is loaded), for example, to call on queue destroy, where we don't need inlining but convenience. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/tx.h | 13 ++++++++- include/net/libeth/types.h | 21 ++++++++++++++- include/net/libeth/xdp.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index e2b62a8b4c57..33b9bb22f6ac 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -84,7 +84,10 @@ struct libeth_sqe { /** * struct libeth_cq_pp - completion queue poll params * @dev: &device to perform DMA unmapping + * @bq: XDP frame bulk to combine return operations * @ss: onstack NAPI stats to fill + * @xss: onstack XDPSQ NAPI stats to fill + * @xdp_tx: number of XDP frames processed * @napi: whether it's called from the NAPI context * * libeth uses this structure to access objects needed for performing full @@ -93,7 +96,13 @@ struct libeth_sqe { */ struct libeth_cq_pp { struct device *dev; - struct libeth_sq_napi_stats *ss; + struct xdp_frame_bulk *bq; + + union { + struct libeth_sq_napi_stats *ss; + struct libeth_xdpsq_napi_stats *xss; + }; + u32 xdp_tx; bool napi; }; @@ -139,4 +148,6 @@ static inline void libeth_tx_complete(struct libeth_sqe *sqe, sqe->type = LIBETH_SQE_EMPTY; } +void libeth_tx_complete_any(struct libeth_sqe *sqe, struct libeth_cq_pp *cp); + #endif /* __LIBETH_TX_H */ diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h index 603825e45133..ad7a5c1f119f 100644 --- a/include/net/libeth/types.h +++ b/include/net/libeth/types.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (C) 2024 Intel Corporation */ +/* Copyright (C) 2024-2025 Intel Corporation */ #ifndef __LIBETH_TYPES_H #define __LIBETH_TYPES_H @@ -22,4 +22,23 @@ struct libeth_sq_napi_stats { }; }; +/** + * struct libeth_xdpsq_napi_stats - "hot" counters to update in XDP Tx + * completion loop + * @packets: completed frames counter + * @bytes: sum of bytes of completed frames above + * @fragments: sum of fragments of completed S/G frames + * @raw: alias to access all the fields as an array + */ +struct libeth_xdpsq_napi_stats { + union { + struct { + u32 packets; + u32 bytes; + u32 fragments; + }; + DECLARE_FLEX_ARRAY(u32, raw); + }; +}; + #endif /* __LIBETH_TYPES_H */ diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 839001d901b2..c47ecba56020 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -824,4 +824,70 @@ out: xdp->data = NULL; } +/* Tx buffer completion */ + +void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, + struct xdp_frame_bulk *bq, bool frags); + +/** + * __libeth_xdp_complete_tx - complete sent XDPSQE + * @sqe: SQ element / Tx buffer to complete + * @cp: Tx polling/completion params + * @bulk: internal callback to bulk-free ``XDP_TX`` buffers + * + * Use the non-underscored version in drivers instead. This one is shared + * internally with libeth_tx_complete_any(). + * Complete an XDPSQE of any type of XDP frame. This includes DMA unmapping + * when needed, buffer freeing, stats update, and SQE invalidation. + */ +static __always_inline void +__libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, + typeof(libeth_xdp_return_buff_bulk) bulk) +{ + enum libeth_sqe_type type = sqe->type; + + switch (type) { + case LIBETH_SQE_EMPTY: + return; + case LIBETH_SQE_XDP_XMIT: + case LIBETH_SQE_XDP_XMIT_FRAG: + dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma), + dma_unmap_len(sqe, len), DMA_TO_DEVICE); + break; + default: + break; + } + + switch (type) { + case LIBETH_SQE_XDP_TX: + bulk(sqe->sinfo, cp->bq, sqe->nr_frags != 1); + break; + case LIBETH_SQE_XDP_XMIT: + xdp_return_frame_bulk(sqe->xdpf, cp->bq); + break; + default: + break; + } + + switch (type) { + case LIBETH_SQE_XDP_TX: + case LIBETH_SQE_XDP_XMIT: + cp->xdp_tx -= sqe->nr_frags; + + cp->xss->packets++; + cp->xss->bytes += sqe->bytes; + break; + default: + break; + } + + sqe->type = LIBETH_SQE_EMPTY; +} + +static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe, + struct libeth_cq_pp *cp) +{ + __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk); +} + #endif /* __LIBETH_XDP_H */ -- cgit v1.2.3 From c4ba6a9b9d460c6fd742e118022f2808ec3c4223 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:24 +0200 Subject: libeth: xdp: add XDPSQ locking helpers Unfortunately, it's not always possible to allocate max(num_rxqs, nr_cpu_ids) even on hi-end NICs. To mitigate this, add simple locking helpers to libeth_xdp. As long as XDPSQs are not shared, the whole functionality is gated behind a static lock. Otherwise, each bulk flush locks the queue for the time of cleaning and filling the descriptors. As long as this particular queue is not used by more than 1 CPU, the impact is minimal (runtime check for boolean twice per 16+ descriptors). Suggested-by: Maciej Fijalkowski # static key Signed-off-by: Alexander Lobakin Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- include/net/libeth/types.h | 21 +++++++- include/net/libeth/xdp.h | 127 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 145 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h index ad7a5c1f119f..abfccae1a346 100644 --- a/include/net/libeth/types.h +++ b/include/net/libeth/types.h @@ -4,7 +4,7 @@ #ifndef __LIBETH_TYPES_H #define __LIBETH_TYPES_H -#include +#include /** * struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop @@ -41,4 +41,23 @@ struct libeth_xdpsq_napi_stats { }; }; +/* XDP */ + +/* + * The following structures should be embedded into driver's queue structure + * and passed to the libeth_xdp helpers, never used directly. + */ + +/* XDPSQ sharing */ + +/** + * struct libeth_xdpsq_lock - locking primitive for sharing XDPSQs + * @lock: spinlock for locking the queue + * @share: whether this particular queue is shared + */ +struct libeth_xdpsq_lock { + spinlock_t lock; + bool share; +}; + #endif /* __LIBETH_TYPES_H */ diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index c47ecba56020..20977fdfd6c9 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -60,6 +60,123 @@ static_assert(offsetof(struct libeth_xdp_buff, desc) == static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk), __alignof(struct libeth_xdp_buff))); +/* XDPSQ sharing */ + +DECLARE_STATIC_KEY_FALSE(libeth_xdpsq_share); + +/** + * libeth_xdpsq_num - calculate optimal number of XDPSQs for this device + sys + * @rxq: current number of active Rx queues + * @txq: current number of active Tx queues + * @max: maximum number of Tx queues + * + * Each RQ must have its own XDPSQ for XSk pairs, each CPU must have own XDPSQ + * for lockless sending (``XDP_TX``, .ndo_xdp_xmit()). Cap the maximum of these + * two with the number of SQs the device can have (minus used ones). + * + * Return: number of XDP Tx queues the device needs to use. + */ +static inline u32 libeth_xdpsq_num(u32 rxq, u32 txq, u32 max) +{ + return min(max(nr_cpu_ids, rxq), max - txq); +} + +/** + * libeth_xdpsq_shared - whether XDPSQs can be shared between several CPUs + * @num: number of active XDPSQs + * + * Return: true if there's no 1:1 XDPSQ/CPU association, false otherwise. + */ +static inline bool libeth_xdpsq_shared(u32 num) +{ + return num < nr_cpu_ids; +} + +/** + * libeth_xdpsq_id - get XDPSQ index corresponding to this CPU + * @num: number of active XDPSQs + * + * Helper for libeth_xdp routines, do not use in drivers directly. + * + * Return: XDPSQ index needs to be used on this CPU. + */ +static inline u32 libeth_xdpsq_id(u32 num) +{ + u32 ret = raw_smp_processor_id(); + + if (static_branch_unlikely(&libeth_xdpsq_share) && + libeth_xdpsq_shared(num)) + ret %= num; + + return ret; +} + +void __libeth_xdpsq_get(struct libeth_xdpsq_lock *lock, + const struct net_device *dev); +void __libeth_xdpsq_put(struct libeth_xdpsq_lock *lock, + const struct net_device *dev); + +/** + * libeth_xdpsq_get - initialize &libeth_xdpsq_lock + * @lock: lock to initialize + * @dev: netdev which this lock belongs to + * @share: whether XDPSQs can be shared + * + * Tracks the current XDPSQ association and enables the static lock + * if needed. + */ +static inline void libeth_xdpsq_get(struct libeth_xdpsq_lock *lock, + const struct net_device *dev, + bool share) +{ + if (unlikely(share)) + __libeth_xdpsq_get(lock, dev); +} + +/** + * libeth_xdpsq_put - deinitialize &libeth_xdpsq_lock + * @lock: lock to deinitialize + * @dev: netdev which this lock belongs to + * + * Tracks the current XDPSQ association and disables the static lock + * if needed. + */ +static inline void libeth_xdpsq_put(struct libeth_xdpsq_lock *lock, + const struct net_device *dev) +{ + if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share) + __libeth_xdpsq_put(lock, dev); +} + +void __libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock); +void __libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock); + +/** + * libeth_xdpsq_lock - grab &libeth_xdpsq_lock if needed + * @lock: lock to take + * + * Touches the underlying spinlock only if the static key is enabled + * and the queue itself is marked as shareable. + */ +static inline void libeth_xdpsq_lock(struct libeth_xdpsq_lock *lock) +{ + if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share) + __libeth_xdpsq_lock(lock); +} + +/** + * libeth_xdpsq_unlock - free &libeth_xdpsq_lock if needed + * @lock: lock to free + * + * Touches the underlying spinlock only if the static key is enabled + * and the queue itself is marked as shareable. + */ +static inline void libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock) +{ + if (static_branch_unlikely(&libeth_xdpsq_share) && lock->share) + __libeth_xdpsq_unlock(lock); +} + /* Common Tx bits */ /** @@ -179,6 +296,7 @@ struct libeth_xdp_tx_bulk { * @count: number of descriptors on that queue * @pending: pointer to the number of sent-not-completed descs on that queue * @xdp_tx: pointer to the above + * @lock: corresponding XDPSQ lock * * Abstraction for driver-independent implementation of Tx. Placed on the stack * and filled by the driver before the transmission, so that the generic @@ -193,6 +311,7 @@ struct libeth_xdpsq { u32 *pending; u32 *xdp_tx; + struct libeth_xdpsq_lock *lock; }; /** @@ -229,7 +348,8 @@ struct libeth_xdp_tx_desc { * * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for * all types of frames. - * @unroll greatly increases the object code size, but also greatly increases + * @prep must lock the queue as this function releases it at the end. @unroll + * greatly increases the object code size, but also greatly increases * performance. * The compilers inline all those onstack abstractions to direct data accesses. * @@ -253,7 +373,7 @@ libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, n = min(n, prep(xdpsq, &sq)); if (unlikely(!n)) - return 0; + goto unlock; ntu = *sq.ntu; @@ -302,6 +422,9 @@ out: if (sq.xdp_tx) *sq.xdp_tx += n; +unlock: + libeth_xdpsq_unlock(sq.lock); + return n; } -- cgit v1.2.3 From 819bbaefeded93df36d71d58d9963d706e6e99e1 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:25 +0200 Subject: libeth: xdp: add XDPSQ cleanup timers When XDP Tx queues are not interrupt-driven but use lazy cleaning, i.e. only when there are less than `threshold` free descriptors left, we also need cleanup timers to avoid &xdp_buff and &xdp_frame stall for too long, especially with Page Pool (it warns every about inflight pages every 60 second). Let's say we sent 256 frames and don't need to send more, but we clean only when the number of pending items >= 384. In that case, those 256 will stall until 128 more are sent. For this, add simple helpers to run a timer which will clean the queue regardless, after 1 second of the last send. The timer is triggered when finalizing the queue. As long as there is regular active traffic, the timer doesn't fire. Signed-off-by: Alexander Lobakin Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- include/net/libeth/types.h | 21 ++++++++++++++++- include/net/libeth/xdp.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h index abfccae1a346..4df703a9eb59 100644 --- a/include/net/libeth/types.h +++ b/include/net/libeth/types.h @@ -4,7 +4,7 @@ #ifndef __LIBETH_TYPES_H #define __LIBETH_TYPES_H -#include +#include /** * struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop @@ -60,4 +60,23 @@ struct libeth_xdpsq_lock { bool share; }; +/* XDPSQ clean-up timers */ + +/** + * struct libeth_xdpsq_timer - timer for cleaning up XDPSQs w/o interrupts + * @xdpsq: queue this timer belongs to + * @lock: lock for the queue + * @dwork: work performing cleanups + * + * XDPSQs not using interrupts but lazy cleaning, i.e. only when there's no + * space for sending the current queued frame/bulk, must fire up timers to + * make sure there are no stale buffers to free. + */ +struct libeth_xdpsq_timer { + void *xdpsq; + struct libeth_xdpsq_lock *lock; + + struct delayed_work dwork; +}; + #endif /* __LIBETH_TYPES_H */ diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 20977fdfd6c9..22bd038decb6 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -177,6 +177,63 @@ static inline void libeth_xdpsq_unlock(struct libeth_xdpsq_lock *lock) __libeth_xdpsq_unlock(lock); } +/* XDPSQ clean-up timers */ + +void libeth_xdpsq_init_timer(struct libeth_xdpsq_timer *timer, void *xdpsq, + struct libeth_xdpsq_lock *lock, + void (*poll)(struct work_struct *work)); + +/** + * libeth_xdpsq_deinit_timer - deinitialize &libeth_xdpsq_timer + * @timer: timer to deinitialize + * + * Flush and disable the underlying workqueue. + */ +static inline void libeth_xdpsq_deinit_timer(struct libeth_xdpsq_timer *timer) +{ + cancel_delayed_work_sync(&timer->dwork); +} + +/** + * libeth_xdpsq_queue_timer - run &libeth_xdpsq_timer + * @timer: timer to queue + * + * Should be called after the queue was filled and the transmission was run + * to complete the pending buffers if no further sending will be done in a + * second (-> lazy cleaning won't happen). + * If the timer was already run, it will be requeued back to one second + * timeout again. + */ +static inline void libeth_xdpsq_queue_timer(struct libeth_xdpsq_timer *timer) +{ + mod_delayed_work_on(raw_smp_processor_id(), system_bh_highpri_wq, + &timer->dwork, HZ); +} + +/** + * libeth_xdpsq_run_timer - wrapper to run a queue clean-up on a timer event + * @work: workqueue belonging to the corresponding timer + * @poll: driver-specific completion queue poll function + * + * Run the polling function on the locked queue and requeue the timer if + * there's more work to do. + * Designed to be used via LIBETH_XDP_DEFINE_TIMER() below. + */ +static __always_inline void +libeth_xdpsq_run_timer(struct work_struct *work, + u32 (*poll)(void *xdpsq, u32 budget)) +{ + struct libeth_xdpsq_timer *timer = container_of(work, typeof(*timer), + dwork.work); + + libeth_xdpsq_lock(timer->lock); + + if (poll(timer->xdpsq, U32_MAX)) + libeth_xdpsq_queue_timer(timer); + + libeth_xdpsq_unlock(timer->lock); +} + /* Common Tx bits */ /** -- cgit v1.2.3 From 3ef2b0192e8ba133f597919632bd9cf196076f0b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:26 +0200 Subject: libeth: xdp: add helpers for preparing/processing &libeth_xdp_buff Add convenience helpers to build an &xdp_buff. This means: general initialization before the NAPI loop, adding head, adding frags etc. libeth_xdp_process_buff() is the same what everybody have in their drivers: dma_sync_for_cpu(); if (!frag) { add_head(); prefetch(); } else { add_frag(); } Note that I don't use net_prefetch(), sticking to the original prefetch(). In none of my tests prefetching 128 bytes yielded better perf than 64 bytes. That might differ if the headers are huge enough, but then additional tunneling etc. overhead takes place, you either way won't win a lot. &libeth_xdp_stash is for cases when you exit the polling loop without finishing building the buff. If that happens, you need to store the buffer in the queue structure until the next loop and then restore it. It makes no sense to place a whole full &xdp_buff there. Define a minimal structure, which would store only the fields essential to restore it. I was able to pack it into 16 bytes, which is only 8 bytes bigger than `struct sk_buff *skb` on x64. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/types.h | 23 +++++++ include/net/libeth/xdp.h | 151 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) (limited to 'include') diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h index 4df703a9eb59..7b27c1966d45 100644 --- a/include/net/libeth/types.h +++ b/include/net/libeth/types.h @@ -79,4 +79,27 @@ struct libeth_xdpsq_timer { struct delayed_work dwork; }; +/* Rx polling path */ + +/** + * struct libeth_xdp_buff_stash - struct for stashing &xdp_buff onto a queue + * @data: pointer to the start of the frame, xdp_buff.data + * @headroom: frame headroom, xdp_buff.data - xdp_buff.data_hard_start + * @len: frame linear space length, xdp_buff.data_end - xdp_buff.data + * @frame_sz: truesize occupied by the frame, xdp_buff.frame_sz + * @flags: xdp_buff.flags + * + * &xdp_buff is 56 bytes long on x64, &libeth_xdp_buff is 64 bytes. This + * structure carries only necessary fields to save/restore a partially built + * frame on the queue structure to finish it during the next NAPI poll. + */ +struct libeth_xdp_buff_stash { + void *data; + u16 headroom; + u16 len; + + u32 frame_sz:24; + u32 flags:8; +} __aligned_largest; + #endif /* __LIBETH_TYPES_H */ diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 22bd038decb6..780447cdabc1 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -60,6 +60,42 @@ static_assert(offsetof(struct libeth_xdp_buff, desc) == static_assert(IS_ALIGNED(sizeof(struct xdp_buff_xsk), __alignof(struct libeth_xdp_buff))); +/** + * __LIBETH_XDP_ONSTACK_BUFF - declare a &libeth_xdp_buff on the stack + * @name: name of the variable to declare + * @...: sizeof() of the driver-private data + */ +#define __LIBETH_XDP_ONSTACK_BUFF(name, ...) \ + ___LIBETH_XDP_ONSTACK_BUFF(name, ##__VA_ARGS__) +/** + * LIBETH_XDP_ONSTACK_BUFF - declare a &libeth_xdp_buff on the stack + * @name: name of the variable to declare + * @...: type or variable name of the driver-private data + */ +#define LIBETH_XDP_ONSTACK_BUFF(name, ...) \ + __LIBETH_XDP_ONSTACK_BUFF(name, __libeth_xdp_priv_sz(__VA_ARGS__)) + +#define ___LIBETH_XDP_ONSTACK_BUFF(name, ...) \ + __DEFINE_FLEX(struct libeth_xdp_buff, name, priv, \ + LIBETH_XDP_PRIV_SZ(__VA_ARGS__ + 0), \ + __uninitialized); \ + LIBETH_XDP_ASSERT_PRIV_SZ(__VA_ARGS__ + 0) + +#define __libeth_xdp_priv_sz(...) \ + CONCATENATE(__libeth_xdp_psz, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) + +#define __libeth_xdp_psz0(...) +#define __libeth_xdp_psz1(...) sizeof(__VA_ARGS__) + +#define LIBETH_XDP_PRIV_SZ(sz) \ + (ALIGN(sz, __alignof(struct libeth_xdp_buff)) / sizeof(long)) + +/* Performs XSK_CHECK_PRIV_TYPE() */ +#define LIBETH_XDP_ASSERT_PRIV_SZ(sz) \ + static_assert(offsetofend(struct xdp_buff_xsk, cb) >= \ + struct_size_t(struct libeth_xdp_buff, priv, \ + LIBETH_XDP_PRIV_SZ(sz))) + /* XDPSQ sharing */ DECLARE_STATIC_KEY_FALSE(libeth_xdpsq_share); @@ -963,6 +999,65 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, /* Rx polling path */ +void libeth_xdp_load_stash(struct libeth_xdp_buff *dst, + const struct libeth_xdp_buff_stash *src); +void libeth_xdp_save_stash(struct libeth_xdp_buff_stash *dst, + const struct libeth_xdp_buff *src); +void __libeth_xdp_return_stash(struct libeth_xdp_buff_stash *stash); + +/** + * libeth_xdp_init_buff - initialize a &libeth_xdp_buff for Rx NAPI poll + * @dst: onstack buffer to initialize + * @src: XDP buffer stash placed on the queue + * @rxq: registered &xdp_rxq_info corresponding to this queue + * + * Should be called before the main NAPI polling loop. Loads the content of + * the previously saved stash or initializes the buffer from scratch. + */ +static inline void +libeth_xdp_init_buff(struct libeth_xdp_buff *dst, + const struct libeth_xdp_buff_stash *src, + struct xdp_rxq_info *rxq) +{ + if (likely(!src->data)) + dst->data = NULL; + else + libeth_xdp_load_stash(dst, src); + + dst->base.rxq = rxq; +} + +/** + * libeth_xdp_save_buff - save a partially built buffer on a queue + * @dst: XDP buffer stash placed on the queue + * @src: onstack buffer to save + * + * Should be called after the main NAPI polling loop. If the loop exited before + * the buffer was finished, saves its content on the queue, so that it can be + * completed during the next poll. Otherwise, clears the stash. + */ +static inline void libeth_xdp_save_buff(struct libeth_xdp_buff_stash *dst, + const struct libeth_xdp_buff *src) +{ + if (likely(!src->data)) + dst->data = NULL; + else + libeth_xdp_save_stash(dst, src); +} + +/** + * libeth_xdp_return_stash - free an XDP buffer stash from a queue + * @stash: stash to free + * + * If the queue is about to be destroyed, but it still has an incompleted + * buffer stash, this helper should be called to free it. + */ +static inline void libeth_xdp_return_stash(struct libeth_xdp_buff_stash *stash) +{ + if (stash->data) + __libeth_xdp_return_stash(stash); +} + static inline void libeth_xdp_return_va(const void *data, bool napi) { netmem_ref netmem = virt_to_netmem(data); @@ -1004,6 +1099,62 @@ out: xdp->data = NULL; } +bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp, + const struct libeth_fqe *fqe, + u32 len); + +/** + * libeth_xdp_prepare_buff - fill &libeth_xdp_buff with head FQE data + * @xdp: XDP buffer to attach the head to + * @fqe: FQE containing the head buffer + * @len: buffer len passed from HW + * + * Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer + * head with the Rx buffer data: data pointer, length, headroom, and + * truesize/tailroom. Zeroes the flags. + */ +static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, + const struct libeth_fqe *fqe, + u32 len) +{ + const struct page *page = __netmem_to_page(fqe->netmem); + + xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); + xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset, + page->pp->p.offset, len, true); +} + +/** + * libeth_xdp_process_buff - attach Rx buffer to &libeth_xdp_buff + * @xdp: XDP buffer to attach the Rx buffer to + * @fqe: Rx buffer to process + * @len: received data length from the descriptor + * + * If the XDP buffer is empty, attaches the Rx buffer as head and initializes + * the required fields. Otherwise, attaches the buffer as a frag. + * Already performs DMA sync-for-CPU and frame start prefetch + * (for head buffers only). + * + * Return: true on success, false if the descriptor must be skipped (empty or + * no space for a new frag). + */ +static inline bool libeth_xdp_process_buff(struct libeth_xdp_buff *xdp, + const struct libeth_fqe *fqe, + u32 len) +{ + if (!libeth_rx_sync_for_cpu(fqe, len)) + return false; + + if (xdp->data) + return libeth_xdp_buff_add_frag(xdp, fqe, len); + + libeth_xdp_prepare_buff(xdp, fqe, len); + + prefetch(xdp->data); + + return true; +} + /* Tx buffer completion */ void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, -- cgit v1.2.3 From 4c805f7ae1ce61a90121378a5ee1f47b3b870c73 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:27 +0200 Subject: libeth: xdp: add XDP prog run and verdict result handling Running a prog and handling the verdicts, up to napi_gro_receive() is also pretty generic code not really differing between vendors (except for Tx descriptor filling and Rx descriptor parsing). Define a couple inlines to do that. The inline callbacks a driver needs to pass is mentioned above: Tx descriptor filling for XDP_TX, populating skb with the descriptor data for XDP_PASS, finalizing XDPSQs after the polling loop for XDP_TX (kicking the HW to start sending). The populate callback passes only &libeth_xdp_buff assuming buff::desc pointer is enough, plus you can always get the corresponding Rx queue structure via container_of(buff::rxq). If not, a driver can extend the buff with more fields directly on the stack without touching libeth_xdp definitions. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/types.h | 22 ++++ include/net/libeth/xdp.h | 281 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+) (limited to 'include') diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h index 7b27c1966d45..cf1d78a9dc38 100644 --- a/include/net/libeth/types.h +++ b/include/net/libeth/types.h @@ -6,6 +6,28 @@ #include +/* Stats */ + +/** + * struct libeth_rq_napi_stats - "hot" counters to update in Rx polling loop + * @packets: received frames counter + * @bytes: sum of bytes of received frames above + * @fragments: sum of fragments of received S/G frames + * @hsplit: number of frames the device performed the header split for + * @raw: alias to access all the fields as an array + */ +struct libeth_rq_napi_stats { + union { + struct { + u32 packets; + u32 bytes; + u32 fragments; + u32 hsplit; + }; + DECLARE_FLEX_ARRAY(u32, raw); + }; +}; + /** * struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop * @packets: completed frames counter diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 780447cdabc1..db99bc690eb6 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -20,6 +20,7 @@ enum { LIBETH_XDP_DROP = BIT(0), LIBETH_XDP_ABORTED = BIT(1), LIBETH_XDP_TX = BIT(2), + LIBETH_XDP_REDIRECT = BIT(3), }; /* @@ -353,6 +354,7 @@ static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == * @prog: corresponding active XDP program, %NULL for .ndo_xdp_xmit() * @dev: &net_device which the frames are transmitted on * @xdpsq: shortcut to the corresponding driver-specific XDPSQ structure + * @act_mask: Rx only, mask of all the XDP prog verdicts for that NAPI session * @count: current number of frames in @bulk * @bulk: array of queued frames for bulk Tx * @@ -366,6 +368,7 @@ struct libeth_xdp_tx_bulk { struct net_device *dev; void *xdpsq; + u32 act_mask; u32 count; struct libeth_xdp_tx_frame bulk[LIBETH_XDP_TX_BULK]; } __aligned(sizeof(struct libeth_xdp_tx_frame)); @@ -999,6 +1002,40 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, /* Rx polling path */ +/** + * libeth_xdp_tx_init_bulk - initialize an XDP Tx bulk for Rx NAPI poll + * @bq: bulk to initialize + * @prog: RCU pointer to the XDP program (can be %NULL) + * @dev: target &net_device + * @xdpsqs: array of driver XDPSQ structs + * @num: number of active XDPSQs, the above array length + * + * Should be called on an onstack XDP Tx bulk before the NAPI polling loop. + * Initializes all the needed fields to run libeth_xdp functions. If @num == 0, + * assumes XDP is not enabled. + */ +#define libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num) \ + __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, false, \ + __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) + +#define __libeth_xdp_tx_init_bulk(bq, pr, d, xdpsqs, num, ub, un) do { \ + typeof(bq) ub = (bq); \ + u32 un = (num); \ + \ + rcu_read_lock(); \ + \ + if (un) { \ + ub->prog = rcu_dereference(pr); \ + ub->dev = (d); \ + ub->xdpsq = (xdpsqs)[libeth_xdpsq_id(un)]; \ + } else { \ + ub->prog = NULL; \ + } \ + \ + ub->act_mask = 0; \ + ub->count = 0; \ +} while (0) + void libeth_xdp_load_stash(struct libeth_xdp_buff *dst, const struct libeth_xdp_buff_stash *src); void libeth_xdp_save_stash(struct libeth_xdp_buff_stash *dst, @@ -1155,6 +1192,250 @@ static inline bool libeth_xdp_process_buff(struct libeth_xdp_buff *xdp, return true; } +/** + * libeth_xdp_buff_stats_frags - update onstack RQ stats with XDP frags info + * @ss: onstack stats to update + * @xdp: buffer to account + * + * Internal helper used by __libeth_xdp_run_pass(), do not call directly. + * Adds buffer's frags count and total len to the onstack stats. + */ +static inline void +libeth_xdp_buff_stats_frags(struct libeth_rq_napi_stats *ss, + const struct libeth_xdp_buff *xdp) +{ + const struct skb_shared_info *sinfo; + + sinfo = xdp_get_shared_info_from_buff(&xdp->base); + ss->bytes += sinfo->xdp_frags_size; + ss->fragments += sinfo->nr_frags + 1; +} + +u32 libeth_xdp_prog_exception(const struct libeth_xdp_tx_bulk *bq, + struct libeth_xdp_buff *xdp, + enum xdp_action act, int ret); + +/** + * __libeth_xdp_run_prog - run XDP program on an XDP buffer + * @xdp: XDP buffer to run the prog on + * @bq: buffer bulk for ``XDP_TX`` queueing + * + * Internal inline abstraction to run XDP program. Handles ``XDP_DROP`` + * and ``XDP_REDIRECT`` only, the rest is processed levels up. + * Reports an XDP prog exception on errors. + * + * Return: libeth_xdp prog verdict depending on the prog's verdict. + */ +static __always_inline u32 +__libeth_xdp_run_prog(struct libeth_xdp_buff *xdp, + const struct libeth_xdp_tx_bulk *bq) +{ + enum xdp_action act; + + act = bpf_prog_run_xdp(bq->prog, &xdp->base); + if (unlikely(act < XDP_DROP || act > XDP_REDIRECT)) + goto out; + + switch (act) { + case XDP_PASS: + return LIBETH_XDP_PASS; + case XDP_DROP: + libeth_xdp_return_buff(xdp); + + return LIBETH_XDP_DROP; + case XDP_TX: + return LIBETH_XDP_TX; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(bq->dev, &xdp->base, bq->prog))) + break; + + xdp->data = NULL; + + return LIBETH_XDP_REDIRECT; + default: + break; + } + +out: + return libeth_xdp_prog_exception(bq, xdp, act, 0); +} + +/** + * __libeth_xdp_run_flush - run XDP program and handle ``XDP_TX`` verdict + * @xdp: XDP buffer to run the prog on + * @bq: buffer bulk for ``XDP_TX`` queueing + * @run: internal callback for running XDP program + * @queue: internal callback for queuing ``XDP_TX`` frame + * @flush_bulk: driver callback for flushing a bulk + * + * Internal inline abstraction to run XDP program and additionally handle + * ``XDP_TX`` verdict. + * Do not use directly. + * + * Return: libeth_xdp prog verdict depending on the prog's verdict. + */ +static __always_inline u32 +__libeth_xdp_run_flush(struct libeth_xdp_buff *xdp, + struct libeth_xdp_tx_bulk *bq, + u32 (*run)(struct libeth_xdp_buff *xdp, + const struct libeth_xdp_tx_bulk *bq), + bool (*queue)(struct libeth_xdp_tx_bulk *bq, + struct libeth_xdp_buff *xdp, + bool (*flush_bulk) + (struct libeth_xdp_tx_bulk *bq, + u32 flags)), + bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, + u32 flags)) +{ + u32 act; + + act = run(xdp, bq); + if (act == LIBETH_XDP_TX && unlikely(!queue(bq, xdp, flush_bulk))) + act = LIBETH_XDP_DROP; + + bq->act_mask |= act; + + return act; +} + +/** + * libeth_xdp_run_prog - run XDP program and handle all verdicts + * @xdp: XDP buffer to process + * @bq: XDP Tx bulk to queue ``XDP_TX`` buffers + * @fl: driver ``XDP_TX`` bulk flush callback + * + * Run the attached XDP program and handle all possible verdicts. + * + * Return: true if the buffer should be passed up the stack, false if the poll + * should go to the next buffer. + */ +#define libeth_xdp_run_prog(xdp, bq, fl) \ + (__libeth_xdp_run_flush(xdp, bq, __libeth_xdp_run_prog, \ + libeth_xdp_tx_queue_bulk, \ + fl) == LIBETH_XDP_PASS) + +/** + * __libeth_xdp_run_pass - helper to run XDP program and handle the result + * @xdp: XDP buffer to process + * @bq: XDP Tx bulk to queue ``XDP_TX`` frames + * @napi: NAPI to build an skb and pass it up the stack + * @rs: onstack libeth RQ stats + * @md: metadata that should be filled to the XDP buffer + * @prep: callback for filling the metadata + * @run: driver wrapper to run XDP program + * @populate: driver callback to populate an skb with the HW descriptor data + * + * Inline abstraction that does the following: + * 1) adds frame size and frag number (if needed) to the onstack stats; + * 2) fills the descriptor metadata to the onstack &libeth_xdp_buff + * 3) runs XDP program if present; + * 4) handles all possible verdicts; + * 5) on ``XDP_PASS`, builds an skb from the buffer; + * 6) populates it with the descriptor metadata; + * 7) passes it up the stack. + * + * In most cases, number 2 means just writing the pointer to the HW descriptor + * to the XDP buffer. If so, please use LIBETH_XDP_DEFINE_RUN{,_PASS}() + * wrappers to build a driver function. + */ +static __always_inline void +__libeth_xdp_run_pass(struct libeth_xdp_buff *xdp, + struct libeth_xdp_tx_bulk *bq, struct napi_struct *napi, + struct libeth_rq_napi_stats *rs, const void *md, + void (*prep)(struct libeth_xdp_buff *xdp, + const void *md), + bool (*run)(struct libeth_xdp_buff *xdp, + struct libeth_xdp_tx_bulk *bq), + bool (*populate)(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs)) +{ + struct sk_buff *skb; + + rs->bytes += xdp->base.data_end - xdp->data; + rs->packets++; + + if (xdp_buff_has_frags(&xdp->base)) + libeth_xdp_buff_stats_frags(rs, xdp); + + if (prep && (!__builtin_constant_p(!!md) || md)) + prep(xdp, md); + + if (!bq || !run || !bq->prog) + goto build; + + if (!run(xdp, bq)) + return; + +build: + skb = xdp_build_skb_from_buff(&xdp->base); + if (unlikely(!skb)) { + libeth_xdp_return_buff_slow(xdp); + return; + } + + xdp->data = NULL; + + if (unlikely(!populate(skb, xdp, rs))) { + napi_consume_skb(skb, true); + return; + } + + napi_gro_receive(napi, skb); +} + +static inline void libeth_xdp_prep_desc(struct libeth_xdp_buff *xdp, + const void *desc) +{ + xdp->desc = desc; +} + +/** + * libeth_xdp_run_pass - helper to run XDP program and handle the result + * @xdp: XDP buffer to process + * @bq: XDP Tx bulk to queue ``XDP_TX`` frames + * @napi: NAPI to build an skb and pass it up the stack + * @ss: onstack libeth RQ stats + * @desc: pointer to the HW descriptor for that frame + * @run: driver wrapper to run XDP program + * @populate: driver callback to populate an skb with the HW descriptor data + * + * Wrapper around the underscored version when "fill the descriptor metadata" + * means just writing the pointer to the HW descriptor as @xdp->desc. + */ +#define libeth_xdp_run_pass(xdp, bq, napi, ss, desc, run, populate) \ + __libeth_xdp_run_pass(xdp, bq, napi, ss, desc, libeth_xdp_prep_desc, \ + run, populate) + +/** + * libeth_xdp_finalize_rx - finalize XDPSQ after a NAPI polling loop + * @bq: ``XDP_TX`` frame bulk + * @flush: driver callback to flush the bulk + * @finalize: driver callback to start sending the frames and run the timer + * + * Flush the bulk if there are frames left to send, kick the queue and flush + * the XDP maps. + */ +#define libeth_xdp_finalize_rx(bq, flush, finalize) \ + __libeth_xdp_finalize_rx(bq, 0, flush, finalize) + +static __always_inline void +__libeth_xdp_finalize_rx(struct libeth_xdp_tx_bulk *bq, u32 flags, + bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, + u32 flags), + void (*finalize)(void *xdpsq, bool sent, bool flush)) +{ + if (bq->act_mask & LIBETH_XDP_TX) { + if (bq->count) + flush_bulk(bq, flags | LIBETH_XDP_TX_DROP); + finalize(bq->xdpsq, true, true); + } + if (bq->act_mask & LIBETH_XDP_REDIRECT) + xdp_do_flush(); + + rcu_read_unlock(); +} + /* Tx buffer completion */ void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, -- cgit v1.2.3 From 1bb635d3748b7158c6a19e6fca4fb85e6f96fd9a Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:28 +0200 Subject: libeth: xdp: add templates for building driver-side callbacks Defining driver-specific functions to pass to libeth_xdp functions can induce boilerplates and/or look a bit cryptic with all those layers of indirection. On the other hand, this indirection is needed to allow compilers to uninline big functions even when passed to __always_inline helpers (too much inlining also hurts performance in some cases), plus to reuse some XDP helpers in XSk code. Add macros to quickly build them, with the detailed kdoc. They take names of the actual callbacks for filling a Tx descriptor and other purely HW-specific things and wrap them appropriately. LIBETH_XDP_DEFINE_{BEGIN,END}() is needed for GCC 8+ unfortunately to let the drivers control which functions will be static and which global without hitting `-Wold-style-declaration`. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/xdp.h | 195 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index db99bc690eb6..46a2ec3c3037 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -742,6 +742,9 @@ __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, * @flags: Tx flags, see above * @prep: driver callback to prepare the queue * @xmit: driver callback to fill a HW descriptor + * + * Use via LIBETH_XDP_DEFINE_FLUSH_TX() to define an ``XDP_TX`` driver + * callback. */ #define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit) \ __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \ @@ -749,6 +752,25 @@ __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, /* .ndo_xdp_xmit() implementation */ +/** + * libeth_xdp_xmit_init_bulk - internal helper to initialize bulk for XDP xmit + * @bq: bulk to initialize + * @dev: target &net_device + * @xdpsqs: array of driver-specific XDPSQ structs + * @num: number of active XDPSQs (the above array length) + */ +#define libeth_xdp_xmit_init_bulk(bq, dev, xdpsqs, num) \ + __libeth_xdp_xmit_init_bulk(bq, dev, (xdpsqs)[libeth_xdpsq_id(num)]) + +static inline void __libeth_xdp_xmit_init_bulk(struct libeth_xdp_tx_bulk *bq, + struct net_device *dev, + void *xdpsq) +{ + bq->dev = dev; + bq->xdpsq = xdpsq; + bq->count = 0; +} + /** * libeth_xdp_xmit_frame_dma - internal helper to access DMA of an &xdp_frame * @xf: pointer to the XDP frame @@ -941,6 +963,9 @@ libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk() * @prep: driver callback to prepare the queue * @xmit: driver callback to fill a HW descriptor + * + * Use via LIBETH_XDP_DEFINE_FLUSH_XMIT() to define an XDP xmit driver + * callback. */ #define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit) \ __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep, \ @@ -1000,6 +1025,44 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, return nxmit; } +/** + * libeth_xdp_xmit_do_bulk - implement full .ndo_xdp_xmit() in driver + * @dev: target &net_device + * @n: number of frames to send + * @fr: XDP frames to send + * @f: flags passed by the stack + * @xqs: array of XDPSQs driver structs + * @nqs: number of active XDPSQs, the above array length + * @fl: driver callback to flush an XDP xmit bulk + * @fin: driver cabback to finalize the queue + * + * If the driver has active XDPSQs, perform common checks and send the frames. + * Finalize the queue, if requested. + * + * Return: number of frames sent or -errno on error. + */ +#define libeth_xdp_xmit_do_bulk(dev, n, fr, f, xqs, nqs, fl, fin) \ + _libeth_xdp_xmit_do_bulk(dev, n, fr, f, xqs, nqs, fl, fin, \ + __UNIQUE_ID(bq_), __UNIQUE_ID(ret_), \ + __UNIQUE_ID(nqs_)) + +#define _libeth_xdp_xmit_do_bulk(d, n, fr, f, xqs, nqs, fl, fin, ub, ur, un) \ +({ \ + u32 un = (nqs); \ + int ur; \ + \ + if (likely(un)) { \ + LIBETH_XDP_ONSTACK_BULK(ub); \ + \ + libeth_xdp_xmit_init_bulk(&ub, d, xqs, un); \ + ur = __libeth_xdp_xmit_do_bulk(&ub, fr, n, f, fl, fin); \ + } else { \ + ur = -ENXIO; \ + } \ + \ + ur; \ +}) + /* Rx polling path */ /** @@ -1305,6 +1368,7 @@ __libeth_xdp_run_flush(struct libeth_xdp_buff *xdp, * @fl: driver ``XDP_TX`` bulk flush callback * * Run the attached XDP program and handle all possible verdicts. + * Prefer using it via LIBETH_XDP_DEFINE_RUN{,_PASS,_PROG}(). * * Return: true if the buffer should be passed up the stack, false if the poll * should go to the next buffer. @@ -1436,6 +1500,137 @@ __libeth_xdp_finalize_rx(struct libeth_xdp_tx_bulk *bq, u32 flags, rcu_read_unlock(); } +/* + * Helpers to reduce boilerplate code in drivers. + * + * Typical driver Rx flow would be (excl. bulk and buff init, frag attach): + * + * LIBETH_XDP_DEFINE_START(); + * LIBETH_XDP_DEFINE_FLUSH_TX(static driver_xdp_flush_tx, driver_xdp_tx_prep, + * driver_xdp_xmit); + * LIBETH_XDP_DEFINE_RUN(static driver_xdp_run, driver_xdp_run_prog, + * driver_xdp_flush_tx, driver_populate_skb); + * LIBETH_XDP_DEFINE_FINALIZE(static driver_xdp_finalize_rx, + * driver_xdp_flush_tx, driver_xdp_finalize_sq); + * LIBETH_XDP_DEFINE_END(); + * + * This will build a set of 4 static functions. The compiler is free to decide + * whether to inline them. + * Then, in the NAPI polling function: + * + * while (packets < budget) { + * // ... + * driver_xdp_run(xdp, &bq, napi, &rs, desc); + * } + * driver_xdp_finalize_rx(&bq); + */ + +#define LIBETH_XDP_DEFINE_START() \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wold-style-declaration", \ + "Allow specifying \'static\' after the return type") + +/** + * LIBETH_XDP_DEFINE_TIMER - define a driver XDPSQ cleanup timer callback + * @name: name of the function to define + * @poll: Tx polling/completion function + */ +#define LIBETH_XDP_DEFINE_TIMER(name, poll) \ +void name(struct work_struct *work) \ +{ \ + libeth_xdpsq_run_timer(work, poll); \ +} + +/** + * LIBETH_XDP_DEFINE_FLUSH_TX - define a driver ``XDP_TX`` bulk flush function + * @name: name of the function to define + * @prep: driver callback to clean an XDPSQ + * @xmit: driver callback to write a HW Tx descriptor + */ +#define LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit) \ + __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, xdp) + +#define __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, pfx) \ +bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \ +{ \ + return libeth_##pfx##_tx_flush_bulk(bq, flags, prep, xmit); \ +} + +/** + * LIBETH_XDP_DEFINE_FLUSH_XMIT - define a driver XDP xmit bulk flush function + * @name: name of the function to define + * @prep: driver callback to clean an XDPSQ + * @xmit: driver callback to write a HW Tx descriptor + */ +#define LIBETH_XDP_DEFINE_FLUSH_XMIT(name, prep, xmit) \ +bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \ +{ \ + return libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit); \ +} + +/** + * LIBETH_XDP_DEFINE_RUN_PROG - define a driver XDP program run function + * @name: name of the function to define + * @flush: driver callback to flush an ``XDP_TX`` bulk + */ +#define LIBETH_XDP_DEFINE_RUN_PROG(name, flush) \ + bool __LIBETH_XDP_DEFINE_RUN_PROG(name, flush, xdp) + +#define __LIBETH_XDP_DEFINE_RUN_PROG(name, flush, pfx) \ +name(struct libeth_xdp_buff *xdp, struct libeth_xdp_tx_bulk *bq) \ +{ \ + return libeth_##pfx##_run_prog(xdp, bq, flush); \ +} + +/** + * LIBETH_XDP_DEFINE_RUN_PASS - define a driver buffer process + pass function + * @name: name of the function to define + * @run: driver callback to run XDP program (above) + * @populate: driver callback to fill an skb with HW descriptor info + */ +#define LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate) \ + void __LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate, xdp) + +#define __LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate, pfx) \ +name(struct libeth_xdp_buff *xdp, struct libeth_xdp_tx_bulk *bq, \ + struct napi_struct *napi, struct libeth_rq_napi_stats *ss, \ + const void *desc) \ +{ \ + return libeth_##pfx##_run_pass(xdp, bq, napi, ss, desc, run, \ + populate); \ +} + +/** + * LIBETH_XDP_DEFINE_RUN - define a driver buffer process, run + pass function + * @name: name of the function to define + * @run: name of the XDP prog run function to define + * @flush: driver callback to flush an ``XDP_TX`` bulk + * @populate: driver callback to fill an skb with HW descriptor info + */ +#define LIBETH_XDP_DEFINE_RUN(name, run, flush, populate) \ + __LIBETH_XDP_DEFINE_RUN(name, run, flush, populate, XDP) + +#define __LIBETH_XDP_DEFINE_RUN(name, run, flush, populate, pfx) \ + LIBETH_##pfx##_DEFINE_RUN_PROG(static run, flush); \ + LIBETH_##pfx##_DEFINE_RUN_PASS(name, run, populate) + +/** + * LIBETH_XDP_DEFINE_FINALIZE - define a driver Rx NAPI poll finalize function + * @name: name of the function to define + * @flush: driver callback to flush an ``XDP_TX`` bulk + * @finalize: driver callback to finalize an XDPSQ and run the timer + */ +#define LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize) \ + __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, xdp) + +#define __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, pfx) \ +void name(struct libeth_xdp_tx_bulk *bq) \ +{ \ + libeth_##pfx##_finalize_rx(bq, flush, finalize); \ +} + +#define LIBETH_XDP_DEFINE_END() __diag_pop() + /* Tx buffer completion */ void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, -- cgit v1.2.3 From 576cc5c13d9ba53a1a24d9b34af2f939a87b7ce8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:29 +0200 Subject: libeth: xdp: add RSS hash hint and XDP features setup helpers End the XDP section by adding helpers to setup XDP features, flipping .ndo_xdp_xmit() support at runtime (in case when it's not always on), and calculating the queue clean/refill threshold. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/xdp.h | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 46a2ec3c3037..c36b2ca0d04c 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1631,6 +1631,51 @@ void name(struct libeth_xdp_tx_bulk *bq) \ #define LIBETH_XDP_DEFINE_END() __diag_pop() +/* XMO */ + +/** + * libeth_xdp_buff_to_rq - get RQ pointer from an XDP buffer pointer + * @xdp: &libeth_xdp_buff corresponding to the queue + * @type: typeof() of the driver Rx queue structure + * @member: name of &xdp_rxq_info inside @type + * + * Often times, pointer to the RQ is needed when reading/filling metadata from + * HW descriptors. The helper can be used to quickly jump from an XDP buffer + * to the queue corresponding to its &xdp_rxq_info without introducing + * additional fields (&libeth_xdp_buff is precisely 1 cacheline long on x64). + */ +#define libeth_xdp_buff_to_rq(xdp, type, member) \ + container_of_const((xdp)->base.rxq, type, member) + +/** + * libeth_xdpmo_rx_hash - convert &libeth_rx_pt to an XDP RSS hash metadata + * @hash: pointer to the variable to write the hash to + * @rss_type: pointer to the variable to write the hash type to + * @val: hash value from the HW descriptor + * @pt: libeth parsed packet type + * + * Handle zeroed/non-available hash and convert libeth parsed packet type to + * the corresponding XDP RSS hash type. To be called at the end of + * xdp_metadata_ops idpf_xdpmo::xmo_rx_hash() implementation. + * Note that if the driver doesn't use a constant packet type lookup table but + * generates it at runtime, it must call libeth_rx_pt_gen_hash_type(pt) to + * generate XDP RSS hash type for each packet type. + * + * Return: 0 on success, -ENODATA when the hash is not available. + */ +static inline int libeth_xdpmo_rx_hash(u32 *hash, + enum xdp_rss_hash_type *rss_type, + u32 val, struct libeth_rx_pt pt) +{ + if (unlikely(!val)) + return -ENODATA; + + *hash = val; + *rss_type = pt.hash_type; + + return 0; +} + /* Tx buffer completion */ void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, @@ -1697,4 +1742,49 @@ static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe, __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk); } +/* Misc */ + +u32 libeth_xdp_queue_threshold(u32 count); + +void __libeth_xdp_set_features(struct net_device *dev, + const struct xdp_metadata_ops *xmo); +void libeth_xdp_set_redirect(struct net_device *dev, bool enable); + +/** + * libeth_xdp_set_features - set XDP features for netdev + * @dev: &net_device to configure + * @...: optional params, see __libeth_xdp_set_features() + * + * Set all the features libeth_xdp supports, including .ndo_xdp_xmit(). That + * said, it should be used only when XDPSQs are always available regardless + * of whether an XDP prog is attached to @dev. + */ +#define libeth_xdp_set_features(dev, ...) \ + CONCATENATE(__libeth_xdp_feat, \ + COUNT_ARGS(__VA_ARGS__))(dev, ##__VA_ARGS__) + +#define __libeth_xdp_feat0(dev) \ + __libeth_xdp_set_features(dev, NULL) +#define __libeth_xdp_feat1(dev, xmo) \ + __libeth_xdp_set_features(dev, xmo) + +/** + * libeth_xdp_set_features_noredir - enable all libeth_xdp features w/o redir + * @dev: target &net_device + * @...: optional params, see __libeth_xdp_set_features() + * + * Enable everything except the .ndo_xdp_xmit() feature, use when XDPSQs are + * not available right after netdev registration. + */ +#define libeth_xdp_set_features_noredir(dev, ...) \ + __libeth_xdp_set_features_noredir(dev, __UNIQUE_ID(dev_), \ + ##__VA_ARGS__) + +#define __libeth_xdp_set_features_noredir(dev, ud, ...) do { \ + struct net_device *ud = (dev); \ + \ + libeth_xdp_set_features(ud, ##__VA_ARGS__); \ + libeth_xdp_set_redirect(ud, false); \ +} while (0) + #endif /* __LIBETH_XDP_H */ -- cgit v1.2.3 From b3ad8450b4dc46c4ab0641f665068fd2a4d1adba Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:30 +0200 Subject: libeth: xsk: add XSk XDP_TX sending helpers Add Xsk counterparts for XDP_TX buffer sending and completion. The same base structures and functions used from the libeth_xdp core, with adjustments to that XSk Rx always operates on &xdp_buff_xsk for both head and frags. And unlike regular Rx, here unlikely() are used for frags, as the header split gives no benefits for XSk Rx, at least for now. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/tx.h | 6 ++ include/net/libeth/xdp.h | 26 +++++++-- include/net/libeth/xsk.h | 148 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 include/net/libeth/xsk.h (limited to 'include') diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index 33b9bb22f6ac..44192bec86d7 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -21,6 +21,8 @@ * @LIBETH_SQE_XDP_TX: &skb_shared_info, libeth_xdp_return_buff_bulk(), stats * @LIBETH_SQE_XDP_XMIT: &xdp_frame, unmap and xdp_return_frame_bulk(), stats * @LIBETH_SQE_XDP_XMIT_FRAG: &xdp_frame frag, only unmap DMA + * @LIBETH_SQE_XSK_TX: &libeth_xdp_buff on XSk queue, xsk_buff_free(), stats + * @LIBETH_SQE_XSK_TX_FRAG: &libeth_xdp_buff frag on XSk queue, xsk_buff_free() */ enum libeth_sqe_type { LIBETH_SQE_EMPTY = 0U, @@ -33,6 +35,8 @@ enum libeth_sqe_type { LIBETH_SQE_XDP_TX = __LIBETH_SQE_XDP_START, LIBETH_SQE_XDP_XMIT, LIBETH_SQE_XDP_XMIT_FRAG, + LIBETH_SQE_XSK_TX, + LIBETH_SQE_XSK_TX_FRAG, }; /** @@ -43,6 +47,7 @@ enum libeth_sqe_type { * @skb: &sk_buff to consume * @sinfo: skb shared info of an XDP_TX frame * @xdpf: XDP frame from ::ndo_xdp_xmit() + * @xsk: XSk Rx frame from XDP_TX action * @dma: DMA address to unmap * @len: length of the mapped region to unmap * @nr_frags: number of frags in the frame this buffer belongs to @@ -59,6 +64,7 @@ struct libeth_sqe { struct sk_buff *skb; struct skb_shared_info *sinfo; struct xdp_frame *xdpf; + struct libeth_xdp_buff *xsk; }; DEFINE_DMA_UNMAP_ADDR(dma); diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index c36b2ca0d04c..ab907f36a35b 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -279,6 +279,7 @@ libeth_xdpsq_run_timer(struct work_struct *work, * @LIBETH_XDP_TX_BATCH: batch size for which the queue fill loop is unrolled * @LIBETH_XDP_TX_DROP: indicates the send function must drop frames not sent * @LIBETH_XDP_TX_NDO: whether the send function is called from .ndo_xdp_xmit() + * @LIBETH_XDP_TX_XSK: whether the function is called for ``XDP_TX`` for XSk */ enum { LIBETH_XDP_TX_BULK = DEV_MAP_BULK_SIZE, @@ -286,6 +287,7 @@ enum { LIBETH_XDP_TX_DROP = BIT(0), LIBETH_XDP_TX_NDO = BIT(1), + LIBETH_XDP_TX_XSK = BIT(2), }; /** @@ -314,7 +316,8 @@ enum { * @frag: one (non-head) frag for ``XDP_TX`` * @xdpf: &xdp_frame for the head frag for .ndo_xdp_xmit() * @dma: DMA address of the non-head frag for .ndo_xdp_xmit() - * @len: frag length for .ndo_xdp_xmit() + * @xsk: ``XDP_TX`` for XSk, XDP buffer for any frag + * @len: frag length for XSk ``XDP_TX`` and .ndo_xdp_xmit() * @flags: Tx flags for the above * @opts: combined @len + @flags for the above for speed */ @@ -330,11 +333,13 @@ struct libeth_xdp_tx_frame { /* ``XDP_TX`` frag */ skb_frag_t frag; - /* .ndo_xdp_xmit() */ + /* .ndo_xdp_xmit(), XSk ``XDP_TX`` */ struct { union { struct xdp_frame *xdpf; dma_addr_t dma; + + struct libeth_xdp_buff *xsk; }; union { struct { @@ -386,6 +391,7 @@ struct libeth_xdp_tx_bulk { /** * struct libeth_xdpsq - abstraction for an XDPSQ + * @pool: XSk buffer pool for XSk ``XDP_TX`` * @sqes: array of Tx buffers from the actual queue struct * @descs: opaque pointer to the HW descriptor array * @ntu: pointer to the next free descriptor index @@ -399,6 +405,7 @@ struct libeth_xdp_tx_bulk { * functions can access and modify driver-specific resources. */ struct libeth_xdpsq { + struct xsk_buff_pool *pool; struct libeth_sqe *sqes; void *descs; @@ -697,7 +704,7 @@ void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent, /** * __libeth_xdp_tx_flush_bulk - internal helper to flush one XDP Tx bulk * @bq: bulk to flush - * @flags: XDP TX flags (.ndo_xdp_xmit() etc.) + * @flags: XDP TX flags (.ndo_xdp_xmit(), XSk etc.) * @prep: driver-specific callback to prepare the queue for sending * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc * @xmit: driver callback to fill a HW descriptor @@ -1680,12 +1687,14 @@ static inline int libeth_xdpmo_rx_hash(u32 *hash, void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, struct xdp_frame_bulk *bq, bool frags); +void libeth_xsk_buff_free_slow(struct libeth_xdp_buff *xdp); /** * __libeth_xdp_complete_tx - complete sent XDPSQE * @sqe: SQ element / Tx buffer to complete * @cp: Tx polling/completion params * @bulk: internal callback to bulk-free ``XDP_TX`` buffers + * @xsk: internal callback to free XSk ``XDP_TX`` buffers * * Use the non-underscored version in drivers instead. This one is shared * internally with libeth_tx_complete_any(). @@ -1694,7 +1703,8 @@ void libeth_xdp_return_buff_bulk(const struct skb_shared_info *sinfo, */ static __always_inline void __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, - typeof(libeth_xdp_return_buff_bulk) bulk) + typeof(libeth_xdp_return_buff_bulk) bulk, + typeof(libeth_xsk_buff_free_slow) xsk) { enum libeth_sqe_type type = sqe->type; @@ -1717,6 +1727,10 @@ __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, case LIBETH_SQE_XDP_XMIT: xdp_return_frame_bulk(sqe->xdpf, cp->bq); break; + case LIBETH_SQE_XSK_TX: + case LIBETH_SQE_XSK_TX_FRAG: + xsk(sqe->xsk); + break; default: break; } @@ -1724,6 +1738,7 @@ __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, switch (type) { case LIBETH_SQE_XDP_TX: case LIBETH_SQE_XDP_XMIT: + case LIBETH_SQE_XSK_TX: cp->xdp_tx -= sqe->nr_frags; cp->xss->packets++; @@ -1739,7 +1754,8 @@ __libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp, static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe, struct libeth_cq_pp *cp) { - __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk); + __libeth_xdp_complete_tx(sqe, cp, libeth_xdp_return_buff_bulk, + libeth_xsk_buff_free_slow); } /* Misc */ diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h new file mode 100644 index 000000000000..af69b46fa7e4 --- /dev/null +++ b/include/net/libeth/xsk.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef __LIBETH_XSK_H +#define __LIBETH_XSK_H + +#include +#include + +/* ``XDP_TX`` bulking */ + +/** + * libeth_xsk_tx_queue_head - internal helper for queueing XSk ``XDP_TX`` head + * @bq: XDP Tx bulk to queue the head frag to + * @xdp: XSk buffer with the head to queue + * + * Return: false if it's the only frag of the frame, true if it's an S/G frame. + */ +static inline bool libeth_xsk_tx_queue_head(struct libeth_xdp_tx_bulk *bq, + struct libeth_xdp_buff *xdp) +{ + bq->bulk[bq->count++] = (typeof(*bq->bulk)){ + .xsk = xdp, + .len = xdp->base.data_end - xdp->data, + .flags = LIBETH_XDP_TX_FIRST, + }; + + if (likely(!xdp_buff_has_frags(&xdp->base))) + return false; + + bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_MULTI; + + return true; +} + +/** + * libeth_xsk_tx_queue_frag - internal helper for queueing XSk ``XDP_TX`` frag + * @bq: XDP Tx bulk to queue the frag to + * @frag: XSk frag to queue + */ +static inline void libeth_xsk_tx_queue_frag(struct libeth_xdp_tx_bulk *bq, + struct libeth_xdp_buff *frag) +{ + bq->bulk[bq->count++] = (typeof(*bq->bulk)){ + .xsk = frag, + .len = frag->base.data_end - frag->data, + }; +} + +/** + * libeth_xsk_tx_queue_bulk - internal helper for queueing XSk ``XDP_TX`` frame + * @bq: XDP Tx bulk to queue the frame to + * @xdp: XSk buffer to queue + * @flush_bulk: driver callback to flush the bulk to the HW queue + * + * Return: true on success, false on flush error. + */ +static __always_inline bool +libeth_xsk_tx_queue_bulk(struct libeth_xdp_tx_bulk *bq, + struct libeth_xdp_buff *xdp, + bool (*flush_bulk)(struct libeth_xdp_tx_bulk *bq, + u32 flags)) +{ + bool ret = true; + + if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && + unlikely(!flush_bulk(bq, LIBETH_XDP_TX_XSK))) { + libeth_xsk_buff_free_slow(xdp); + return false; + } + + if (!libeth_xsk_tx_queue_head(bq, xdp)) + goto out; + + for (const struct libeth_xdp_buff *head = xdp; ; ) { + xdp = container_of(xsk_buff_get_frag(&head->base), + typeof(*xdp), base); + if (!xdp) + break; + + if (unlikely(bq->count == LIBETH_XDP_TX_BULK) && + unlikely(!flush_bulk(bq, LIBETH_XDP_TX_XSK))) { + ret = false; + break; + } + + libeth_xsk_tx_queue_frag(bq, xdp); + } + +out: + bq->bulk[bq->count - 1].flags |= LIBETH_XDP_TX_LAST; + + return ret; +} + +/** + * libeth_xsk_tx_fill_buf - internal helper to fill XSk ``XDP_TX`` &libeth_sqe + * @frm: XDP Tx frame from the bulk + * @i: index on the HW queue + * @sq: XDPSQ abstraction for the queue + * @priv: private data + * + * Return: XDP Tx descriptor with the synced DMA and other info to pass to + * the driver callback. + */ +static inline struct libeth_xdp_tx_desc +libeth_xsk_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + struct libeth_xdp_buff *xdp = frm.xsk; + struct libeth_xdp_tx_desc desc = { + .addr = xsk_buff_xdp_get_dma(&xdp->base), + .opts = frm.opts, + }; + struct libeth_sqe *sqe; + + xsk_buff_raw_dma_sync_for_device(sq->pool, desc.addr, desc.len); + + sqe = &sq->sqes[i]; + sqe->xsk = xdp; + + if (!(desc.flags & LIBETH_XDP_TX_FIRST)) { + sqe->type = LIBETH_SQE_XSK_TX_FRAG; + return desc; + } + + sqe->type = LIBETH_SQE_XSK_TX; + libeth_xdp_tx_fill_stats(sqe, &desc, + xdp_get_shared_info_from_buff(&xdp->base)); + + return desc; +} + +/** + * libeth_xsk_tx_flush_bulk - wrapper to define flush of XSk ``XDP_TX`` bulk + * @bq: bulk to flush + * @flags: Tx flags, see __libeth_xdp_tx_flush_bulk() + * @prep: driver callback to prepare the queue + * @xmit: driver callback to fill a HW descriptor + * + * Use via LIBETH_XSK_DEFINE_FLUSH_TX() to define an XSk ``XDP_TX`` driver + * callback. + */ +#define libeth_xsk_tx_flush_bulk(bq, flags, prep, xmit) \ + __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_XSK, prep, \ + libeth_xsk_tx_fill_buf, xmit) + +#endif /* __LIBETH_XSK_H */ -- cgit v1.2.3 From 40e846d122df9b299e700ec86d01ef647fc0b09f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:31 +0200 Subject: libeth: xsk: add XSk xmit functions Reuse core sending functions to send XSk xmit frames. Both metadata and no metadata pools/driver are supported. libeth_xdp also provides generic XSk metadata ops, currently with the checksum offload only and for cases when HW doesn't require supplying L3/L4 checksum offsets. Drivers are free to pass their own ops. &libeth_xdp_tx_bulk is not used here as it would be redundant; pool->tx_descs are accessed directly. Fake "libeth_xsktmo" is needed to hide implementation details from the drivers when they want to use the generic ops: the original struct is defined in the same file where dev->xsk_tx_metadata_ops gets set to avoid duplication of slowpath; at the same time; XSk xmit functions use local "fast" copy to inline XMO callbacks. Tx descriptor filling loop is unrolled by 8. Suggested-by: Maciej Fijalkowski # optimizations Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/tx.h | 4 +- include/net/libeth/xdp.h | 73 +++++++++++++++++---- include/net/libeth/xsk.h | 166 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index 44192bec86d7..c3db5c6f1641 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -12,7 +12,7 @@ /** * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion - * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX frag, no action required + * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX/XSk frame, no action required * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree() * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA @@ -93,7 +93,7 @@ struct libeth_sqe { * @bq: XDP frame bulk to combine return operations * @ss: onstack NAPI stats to fill * @xss: onstack XDPSQ NAPI stats to fill - * @xdp_tx: number of XDP frames processed + * @xdp_tx: number of XDP-not-XSk frames processed * @napi: whether it's called from the NAPI context * * libeth uses this structure to access objects needed for performing full diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index ab907f36a35b..c3655458047d 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -293,6 +293,8 @@ enum { /** * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length + * @LIBETH_XDP_TX_CSUM: for XSk xmit, enable checksum offload + * @LIBETH_XDP_TX_XSKMD: for XSk xmit, mask of the metadata bits * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags @@ -301,6 +303,9 @@ enum { enum { LIBETH_XDP_TX_LEN = GENMASK(15, 0), + LIBETH_XDP_TX_CSUM = XDP_TXMD_FLAGS_CHECKSUM, + LIBETH_XDP_TX_XSKMD = LIBETH_XDP_TX_LEN, + LIBETH_XDP_TX_FIRST = BIT(16), LIBETH_XDP_TX_LAST = BIT(17), LIBETH_XDP_TX_MULTI = BIT(18), @@ -320,6 +325,7 @@ enum { * @len: frag length for XSk ``XDP_TX`` and .ndo_xdp_xmit() * @flags: Tx flags for the above * @opts: combined @len + @flags for the above for speed + * @desc: XSk xmit descriptor for direct casting */ struct libeth_xdp_tx_frame { union { @@ -349,10 +355,14 @@ struct libeth_xdp_tx_frame { aligned_u64 opts; }; }; + + /* XSk xmit */ + struct xdp_desc desc; }; -} __aligned_largest; +} __aligned(sizeof(struct xdp_desc)); static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == offsetof(struct libeth_xdp_tx_frame, len_fl)); +static_assert(sizeof(struct libeth_xdp_tx_frame) == sizeof(struct xdp_desc)); /** * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending @@ -363,10 +373,13 @@ static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == * @count: current number of frames in @bulk * @bulk: array of queued frames for bulk Tx * - * All XDP Tx operations queue each frame to the bulk first and flush it - * when @count reaches the array end. Bulk is always placed on the stack - * for performance. One bulk element contains all the data necessary + * All XDP Tx operations except XSk xmit queue each frame to the bulk first + * and flush it when @count reaches the array end. Bulk is always placed on + * the stack for performance. One bulk element contains all the data necessary * for sending a frame and then freeing it on completion. + * For XSk xmit, Tx descriptor array from &xsk_buff_pool is casted directly + * to &libeth_xdp_tx_frame as they are compatible and the bulk structure is + * not used. */ struct libeth_xdp_tx_bulk { const struct bpf_prog *prog; @@ -391,13 +404,13 @@ struct libeth_xdp_tx_bulk { /** * struct libeth_xdpsq - abstraction for an XDPSQ - * @pool: XSk buffer pool for XSk ``XDP_TX`` + * @pool: XSk buffer pool for XSk ``XDP_TX`` and xmit * @sqes: array of Tx buffers from the actual queue struct * @descs: opaque pointer to the HW descriptor array * @ntu: pointer to the next free descriptor index * @count: number of descriptors on that queue * @pending: pointer to the number of sent-not-completed descs on that queue - * @xdp_tx: pointer to the above + * @xdp_tx: pointer to the above, but only for non-XSk-xmit frames * @lock: corresponding XDPSQ lock * * Abstraction for driver-independent implementation of Tx. Placed on the stack @@ -438,6 +451,30 @@ struct libeth_xdp_tx_desc { }; } __aligned_largest; +/** + * libeth_xdp_ptr_to_priv - convert pointer to a libeth_xdp u64 priv + * @ptr: pointer to convert + * + * The main sending function passes private data as the largest scalar, u64. + * Use this helper when you want to pass a pointer there. + */ +#define libeth_xdp_ptr_to_priv(ptr) ({ \ + typecheck_pointer(ptr); \ + ((u64)(uintptr_t)(ptr)); \ +}) +/** + * libeth_xdp_priv_to_ptr - convert libeth_xdp u64 priv to a pointer + * @priv: private data to convert + * + * The main sending function passes private data as the largest scalar, u64. + * Use this helper when your callback takes this u64 and you want to convert + * it back to a pointer. + */ +#define libeth_xdp_priv_to_ptr(priv) ({ \ + static_assert(__same_type(priv, u64)); \ + ((const void *)(uintptr_t)(priv)); \ +}) + /** * libeth_xdp_tx_xmit_bulk - main XDP Tx function * @bulk: array of frames to send @@ -450,10 +487,11 @@ struct libeth_xdp_tx_desc { * @xmit: callback for filling a HW descriptor with the frame info * * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for - * all types of frames. + * all types of frames: ``XDP_TX``, .ndo_xdp_xmit(), XSk ``XDP_TX``, and XSk + * xmit. * @prep must lock the queue as this function releases it at the end. @unroll - * greatly increases the object code size, but also greatly increases - * performance. + * greatly increases the object code size, but also greatly increases XSk xmit + * performance; for other types of frames, it's not enabled. * The compilers inline all those onstack abstractions to direct data accesses. * * Return: number of frames actually placed on the queue, <= @n. The function @@ -709,7 +747,8 @@ void libeth_xdp_tx_exception(struct libeth_xdp_tx_bulk *bq, u32 sent, * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc * @xmit: driver callback to fill a HW descriptor * - * Internal abstraction to create bulk flush functions for drivers. + * Internal abstraction to create bulk flush functions for drivers. Used for + * everything except XSk xmit. * * Return: true if anything was sent, false otherwise. */ @@ -1763,7 +1802,9 @@ static inline void libeth_xdp_complete_tx(struct libeth_sqe *sqe, u32 libeth_xdp_queue_threshold(u32 count); void __libeth_xdp_set_features(struct net_device *dev, - const struct xdp_metadata_ops *xmo); + const struct xdp_metadata_ops *xmo, + u32 zc_segs, + const struct xsk_tx_metadata_ops *tmo); void libeth_xdp_set_redirect(struct net_device *dev, bool enable); /** @@ -1780,9 +1821,13 @@ void libeth_xdp_set_redirect(struct net_device *dev, bool enable); COUNT_ARGS(__VA_ARGS__))(dev, ##__VA_ARGS__) #define __libeth_xdp_feat0(dev) \ - __libeth_xdp_set_features(dev, NULL) + __libeth_xdp_set_features(dev, NULL, 0, NULL) #define __libeth_xdp_feat1(dev, xmo) \ - __libeth_xdp_set_features(dev, xmo) + __libeth_xdp_set_features(dev, xmo, 0, NULL) +#define __libeth_xdp_feat2(dev, xmo, zc_segs) \ + __libeth_xdp_set_features(dev, xmo, zc_segs, NULL) +#define __libeth_xdp_feat3(dev, xmo, zc_segs, tmo) \ + __libeth_xdp_set_features(dev, xmo, zc_segs, tmo) /** * libeth_xdp_set_features_noredir - enable all libeth_xdp features w/o redir @@ -1803,4 +1848,6 @@ void libeth_xdp_set_redirect(struct net_device *dev, bool enable); libeth_xdp_set_redirect(ud, false); \ } while (0) +#define libeth_xsktmo ((const void *)GOLDEN_RATIO_PRIME) + #endif /* __LIBETH_XDP_H */ diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index af69b46fa7e4..16ca195981fe 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -7,6 +7,11 @@ #include #include +/* ``XDP_TXMD_FLAGS_VALID`` is defined only under ``CONFIG_XDP_SOCKETS`` */ +#ifdef XDP_TXMD_FLAGS_VALID +static_assert(XDP_TXMD_FLAGS_VALID <= LIBETH_XDP_TX_XSKMD); +#endif + /* ``XDP_TX`` bulking */ /** @@ -145,4 +150,165 @@ libeth_xsk_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_XSK, prep, \ libeth_xsk_tx_fill_buf, xmit) +/* XSk TMO */ + +/** + * libeth_xsktmo_req_csum - XSk Tx metadata op to request checksum offload + * @csum_start: unused + * @csum_offset: unused + * @priv: &libeth_xdp_tx_desc from the filling helper + * + * Generic implementation of ::tmo_request_checksum. Works only when HW doesn't + * require filling checksum offsets and other parameters beside the checksum + * request bit. + * Consider using within @libeth_xsktmo unless the driver requires HW-specific + * callbacks. + */ +static inline void libeth_xsktmo_req_csum(u16 csum_start, u16 csum_offset, + void *priv) +{ + ((struct libeth_xdp_tx_desc *)priv)->flags |= LIBETH_XDP_TX_CSUM; +} + +/* Only to inline the callbacks below, use @libeth_xsktmo in drivers instead */ +static const struct xsk_tx_metadata_ops __libeth_xsktmo = { + .tmo_request_checksum = libeth_xsktmo_req_csum, +}; + +/** + * __libeth_xsk_xmit_fill_buf_md - internal helper to prepare XSk xmit w/meta + * @xdesc: &xdp_desc from the XSk buffer pool + * @sq: XDPSQ abstraction for the queue + * @priv: XSk Tx metadata ops + * + * Same as __libeth_xsk_xmit_fill_buf(), but requests metadata pointer and + * fills additional fields in &libeth_xdp_tx_desc to ask for metadata offload. + * + * Return: XDP Tx descriptor with the DMA, metadata request bits, and other + * info to pass to the driver callback. + */ +static __always_inline struct libeth_xdp_tx_desc +__libeth_xsk_xmit_fill_buf_md(const struct xdp_desc *xdesc, + const struct libeth_xdpsq *sq, + u64 priv) +{ + const struct xsk_tx_metadata_ops *tmo = libeth_xdp_priv_to_ptr(priv); + struct libeth_xdp_tx_desc desc; + struct xdp_desc_ctx ctx; + + ctx = xsk_buff_raw_get_ctx(sq->pool, xdesc->addr); + desc = (typeof(desc)){ + .addr = ctx.dma, + .len = xdesc->len, + }; + + BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo)); + tmo = tmo == libeth_xsktmo ? &__libeth_xsktmo : tmo; + + xsk_tx_metadata_request(ctx.meta, tmo, &desc); + + return desc; +} + +/* XSk xmit implementation */ + +/** + * __libeth_xsk_xmit_fill_buf - internal helper to prepare XSk xmit w/o meta + * @xdesc: &xdp_desc from the XSk buffer pool + * @sq: XDPSQ abstraction for the queue + * + * Return: XDP Tx descriptor with the DMA and other info to pass to + * the driver callback. + */ +static inline struct libeth_xdp_tx_desc +__libeth_xsk_xmit_fill_buf(const struct xdp_desc *xdesc, + const struct libeth_xdpsq *sq) +{ + return (struct libeth_xdp_tx_desc){ + .addr = xsk_buff_raw_get_dma(sq->pool, xdesc->addr), + .len = xdesc->len, + }; +} + +/** + * libeth_xsk_xmit_fill_buf - internal helper to prepare an XSk xmit + * @frm: &xdp_desc from the XSk buffer pool + * @i: index on the HW queue + * @sq: XDPSQ abstraction for the queue + * @priv: XSk Tx metadata ops + * + * Depending on the metadata ops presence (determined at compile time), calls + * the quickest helper to build a libeth XDP Tx descriptor. + * + * Return: XDP Tx descriptor with the synced DMA, metadata request bits, + * and other info to pass to the driver callback. + */ +static __always_inline struct libeth_xdp_tx_desc +libeth_xsk_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + struct libeth_xdp_tx_desc desc; + + if (priv) + desc = __libeth_xsk_xmit_fill_buf_md(&frm.desc, sq, priv); + else + desc = __libeth_xsk_xmit_fill_buf(&frm.desc, sq); + + desc.flags |= xsk_is_eop_desc(&frm.desc) ? LIBETH_XDP_TX_LAST : 0; + + xsk_buff_raw_dma_sync_for_device(sq->pool, desc.addr, desc.len); + + return desc; +} + +/** + * libeth_xsk_xmit_do_bulk - send XSk xmit frames + * @pool: XSk buffer pool containing the frames to send + * @xdpsq: opaque pointer to driver's XDPSQ struct + * @budget: maximum number of frames can be sent + * @tmo: optional XSk Tx metadata ops + * @prep: driver callback to build a &libeth_xdpsq + * @xmit: driver callback to put frames to a HW queue + * @finalize: driver callback to start a transmission + * + * Implements generic XSk xmit. Always turns on XSk Tx wakeup as it's assumed + * lazy cleaning is used and interrupts are disabled for the queue. + * HW descriptor filling is unrolled by ``LIBETH_XDP_TX_BATCH`` to optimize + * writes. + * Note that unlike other XDP Tx ops, the queue must be locked and cleaned + * prior to calling this function to already know available @budget. + * @prepare must only build a &libeth_xdpsq and return ``U32_MAX``. + * + * Return: false if @budget was exhausted, true otherwise. + */ +static __always_inline bool +libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, + const struct xsk_tx_metadata_ops *tmo, + u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), + void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv), + void (*finalize)(void *xdpsq, bool sent, bool flush)) +{ + const struct libeth_xdp_tx_frame *bulk; + bool wake; + u32 n; + + wake = xsk_uses_need_wakeup(pool); + if (wake) + xsk_clear_tx_need_wakeup(pool); + + n = xsk_tx_peek_release_desc_batch(pool, budget); + bulk = container_of(&pool->tx_descs[0], typeof(*bulk), desc); + + libeth_xdp_tx_xmit_bulk(bulk, xdpsq, n, true, + libeth_xdp_ptr_to_priv(tmo), prep, + libeth_xsk_xmit_fill_buf, xmit); + finalize(xdpsq, n, true); + + if (wake) + xsk_set_tx_need_wakeup(pool); + + return n < budget; +} + #endif /* __LIBETH_XSK_H */ -- cgit v1.2.3 From 5495c58c65aa3d650cccaa19dc59115b9a0069a5 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:32 +0200 Subject: libeth: xsk: add XSk Rx processing support Add XSk counterparts for preparing XSk &libeth_xdp_buff (adding head and frags), running the program, and handling the verdict, inc. XDP_PASS. Shortcuts in comparison with regular Rx: frags and all verdicts except XDP_REDIRECT are under unlikely() and out of line; no checks for XDP program presence as it's always true for XSk. Suggested-by: Maciej Fijalkowski # optimizations Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/xdp.h | 17 +-- include/net/libeth/xsk.h | 273 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 283 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index c3655458047d..dba09a9168f1 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1122,18 +1122,19 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, * Should be called on an onstack XDP Tx bulk before the NAPI polling loop. * Initializes all the needed fields to run libeth_xdp functions. If @num == 0, * assumes XDP is not enabled. + * Do not use for XSk, it has its own optimized helper. */ #define libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num) \ __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, false, \ __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) -#define __libeth_xdp_tx_init_bulk(bq, pr, d, xdpsqs, num, ub, un) do { \ +#define __libeth_xdp_tx_init_bulk(bq, pr, d, xdpsqs, num, xsk, ub, un) do { \ typeof(bq) ub = (bq); \ u32 un = (num); \ \ rcu_read_lock(); \ \ - if (un) { \ + if (un || (xsk)) { \ ub->prog = rcu_dereference(pr); \ ub->dev = (d); \ ub->xdpsq = (xdpsqs)[libeth_xdpsq_id(un)]; \ @@ -1159,6 +1160,7 @@ void __libeth_xdp_return_stash(struct libeth_xdp_buff_stash *stash); * * Should be called before the main NAPI polling loop. Loads the content of * the previously saved stash or initializes the buffer from scratch. + * Do not use for XSk. */ static inline void libeth_xdp_init_buff(struct libeth_xdp_buff *dst, @@ -1378,7 +1380,7 @@ out: * @flush_bulk: driver callback for flushing a bulk * * Internal inline abstraction to run XDP program and additionally handle - * ``XDP_TX`` verdict. + * ``XDP_TX`` verdict. Used by both XDP and XSk, hence @run and @queue. * Do not use directly. * * Return: libeth_xdp prog verdict depending on the prog's verdict. @@ -1408,12 +1410,13 @@ __libeth_xdp_run_flush(struct libeth_xdp_buff *xdp, } /** - * libeth_xdp_run_prog - run XDP program and handle all verdicts + * libeth_xdp_run_prog - run XDP program (non-XSk path) and handle all verdicts * @xdp: XDP buffer to process * @bq: XDP Tx bulk to queue ``XDP_TX`` buffers * @fl: driver ``XDP_TX`` bulk flush callback * - * Run the attached XDP program and handle all possible verdicts. + * Run the attached XDP program and handle all possible verdicts. XSk has its + * own version. * Prefer using it via LIBETH_XDP_DEFINE_RUN{,_PASS,_PROG}(). * * Return: true if the buffer should be passed up the stack, false if the poll @@ -1435,7 +1438,7 @@ __libeth_xdp_run_flush(struct libeth_xdp_buff *xdp, * @run: driver wrapper to run XDP program * @populate: driver callback to populate an skb with the HW descriptor data * - * Inline abstraction that does the following: + * Inline abstraction that does the following (non-XSk path): * 1) adds frame size and frag number (if needed) to the onstack stats; * 2) fills the descriptor metadata to the onstack &libeth_xdp_buff * 3) runs XDP program if present; @@ -1518,7 +1521,7 @@ static inline void libeth_xdp_prep_desc(struct libeth_xdp_buff *xdp, run, populate) /** - * libeth_xdp_finalize_rx - finalize XDPSQ after a NAPI polling loop + * libeth_xdp_finalize_rx - finalize XDPSQ after a NAPI polling loop (non-XSk) * @bq: ``XDP_TX`` frame bulk * @flush: driver callback to flush the bulk * @finalize: driver callback to start sending the frames and run the timer diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 16ca195981fe..f3f338e566fc 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -311,4 +311,277 @@ libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, return n < budget; } +/* Rx polling path */ + +/** + * libeth_xsk_tx_init_bulk - initialize XDP Tx bulk for an XSk Rx NAPI poll + * @bq: bulk to initialize + * @prog: RCU pointer to the XDP program (never %NULL) + * @dev: target &net_device + * @xdpsqs: array of driver XDPSQ structs + * @num: number of active XDPSQs, the above array length + * + * Should be called on an onstack XDP Tx bulk before the XSk NAPI polling loop. + * Initializes all the needed fields to run libeth_xdp functions. + * Never checks if @prog is %NULL or @num == 0 as XDP must always be enabled + * when hitting this path. + */ +#define libeth_xsk_tx_init_bulk(bq, prog, dev, xdpsqs, num) \ + __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, true, \ + __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) + +struct libeth_xdp_buff *libeth_xsk_buff_add_frag(struct libeth_xdp_buff *head, + struct libeth_xdp_buff *xdp); + +/** + * libeth_xsk_process_buff - attach XSk Rx buffer to &libeth_xdp_buff + * @head: head XSk buffer to attach the XSk buffer to (or %NULL) + * @xdp: XSk buffer to process + * @len: received data length from the descriptor + * + * If @head == %NULL, treats the XSk buffer as head and initializes + * the required fields. Otherwise, attaches the buffer as a frag. + * Already performs DMA sync-for-CPU and frame start prefetch + * (for head buffers only). + * + * Return: head XSk buffer on success or if the descriptor must be skipped + * (empty), %NULL if there is no space for a new frag. + */ +static inline struct libeth_xdp_buff * +libeth_xsk_process_buff(struct libeth_xdp_buff *head, + struct libeth_xdp_buff *xdp, u32 len) +{ + if (unlikely(!len)) { + libeth_xsk_buff_free_slow(xdp); + return head; + } + + xsk_buff_set_size(&xdp->base, len); + xsk_buff_dma_sync_for_cpu(&xdp->base); + + if (head) + return libeth_xsk_buff_add_frag(head, xdp); + + prefetch(xdp->data); + + return xdp; +} + +void libeth_xsk_buff_stats_frags(struct libeth_rq_napi_stats *rs, + const struct libeth_xdp_buff *xdp); + +u32 __libeth_xsk_run_prog_slow(struct libeth_xdp_buff *xdp, + const struct libeth_xdp_tx_bulk *bq, + enum xdp_action act, int ret); + +/** + * __libeth_xsk_run_prog - run XDP program on XSk buffer + * @xdp: XSk buffer to run the prog on + * @bq: buffer bulk for ``XDP_TX`` queueing + * + * Internal inline abstraction to run XDP program on XSk Rx path. Handles + * only the most common ``XDP_REDIRECT`` inline, the rest is processed + * externally. + * Reports an XDP prog exception on errors. + * + * Return: libeth_xdp prog verdict depending on the prog's verdict. + */ +static __always_inline u32 +__libeth_xsk_run_prog(struct libeth_xdp_buff *xdp, + const struct libeth_xdp_tx_bulk *bq) +{ + enum xdp_action act; + int ret = 0; + + act = bpf_prog_run_xdp(bq->prog, &xdp->base); + if (unlikely(act != XDP_REDIRECT)) +rest: + return __libeth_xsk_run_prog_slow(xdp, bq, act, ret); + + ret = xdp_do_redirect(bq->dev, &xdp->base, bq->prog); + if (unlikely(ret)) + goto rest; + + return LIBETH_XDP_REDIRECT; +} + +/** + * libeth_xsk_run_prog - run XDP program on XSk path and handle all verdicts + * @xdp: XSk buffer to process + * @bq: XDP Tx bulk to queue ``XDP_TX`` buffers + * @fl: driver ``XDP_TX`` bulk flush callback + * + * Run the attached XDP program and handle all possible verdicts. + * Prefer using it via LIBETH_XSK_DEFINE_RUN{,_PASS,_PROG}(). + * + * Return: libeth_xdp prog verdict depending on the prog's verdict. + */ +#define libeth_xsk_run_prog(xdp, bq, fl) \ + __libeth_xdp_run_flush(xdp, bq, __libeth_xsk_run_prog, \ + libeth_xsk_tx_queue_bulk, fl) + +/** + * __libeth_xsk_run_pass - helper to run XDP program and handle the result + * @xdp: XSk buffer to process + * @bq: XDP Tx bulk to queue ``XDP_TX`` frames + * @napi: NAPI to build an skb and pass it up the stack + * @rs: onstack libeth RQ stats + * @md: metadata that should be filled to the XSk buffer + * @prep: callback for filling the metadata + * @run: driver wrapper to run XDP program + * @populate: driver callback to populate an skb with the HW descriptor data + * + * Inline abstraction, XSk's counterpart of __libeth_xdp_run_pass(), see its + * doc for details. + * + * Return: false if the polling loop must be exited due to lack of free + * buffers, true otherwise. + */ +static __always_inline bool +__libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, + struct libeth_xdp_tx_bulk *bq, struct napi_struct *napi, + struct libeth_rq_napi_stats *rs, const void *md, + void (*prep)(struct libeth_xdp_buff *xdp, + const void *md), + u32 (*run)(struct libeth_xdp_buff *xdp, + struct libeth_xdp_tx_bulk *bq), + bool (*populate)(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs)) +{ + struct sk_buff *skb; + u32 act; + + rs->bytes += xdp->base.data_end - xdp->data; + rs->packets++; + + if (unlikely(xdp_buff_has_frags(&xdp->base))) + libeth_xsk_buff_stats_frags(rs, xdp); + + if (prep && (!__builtin_constant_p(!!md) || md)) + prep(xdp, md); + + act = run(xdp, bq); + if (likely(act == LIBETH_XDP_REDIRECT)) + return true; + + if (act != LIBETH_XDP_PASS) + return act != LIBETH_XDP_ABORTED; + + skb = xdp_build_skb_from_zc(&xdp->base); + if (unlikely(!skb)) { + libeth_xsk_buff_free_slow(xdp); + return true; + } + + if (unlikely(!populate(skb, xdp, rs))) { + napi_consume_skb(skb, true); + return true; + } + + napi_gro_receive(napi, skb); + + return true; +} + +/** + * libeth_xsk_run_pass - helper to run XDP program and handle the result + * @xdp: XSk buffer to process + * @bq: XDP Tx bulk to queue ``XDP_TX`` frames + * @napi: NAPI to build an skb and pass it up the stack + * @rs: onstack libeth RQ stats + * @desc: pointer to the HW descriptor for that frame + * @run: driver wrapper to run XDP program + * @populate: driver callback to populate an skb with the HW descriptor data + * + * Wrapper around the underscored version when "fill the descriptor metadata" + * means just writing the pointer to the HW descriptor as @xdp->desc. + */ +#define libeth_xsk_run_pass(xdp, bq, napi, rs, desc, run, populate) \ + __libeth_xsk_run_pass(xdp, bq, napi, rs, desc, libeth_xdp_prep_desc, \ + run, populate) + +/** + * libeth_xsk_finalize_rx - finalize XDPSQ after an XSk NAPI polling loop + * @bq: ``XDP_TX`` frame bulk + * @flush: driver callback to flush the bulk + * @finalize: driver callback to start sending the frames and run the timer + * + * Flush the bulk if there are frames left to send, kick the queue and flush + * the XDP maps. + */ +#define libeth_xsk_finalize_rx(bq, flush, finalize) \ + __libeth_xdp_finalize_rx(bq, LIBETH_XDP_TX_XSK, flush, finalize) + +/* + * Helpers to reduce boilerplate code in drivers. + * + * Typical driver XSk Rx flow would be (excl. bulk and buff init, frag attach): + * + * LIBETH_XDP_DEFINE_START(); + * LIBETH_XSK_DEFINE_FLUSH_TX(static driver_xsk_flush_tx, driver_xsk_tx_prep, + * driver_xdp_xmit); + * LIBETH_XSK_DEFINE_RUN(static driver_xsk_run, driver_xsk_run_prog, + * driver_xsk_flush_tx, driver_populate_skb); + * LIBETH_XSK_DEFINE_FINALIZE(static driver_xsk_finalize_rx, + * driver_xsk_flush_tx, driver_xdp_finalize_sq); + * LIBETH_XDP_DEFINE_END(); + * + * This will build a set of 4 static functions. The compiler is free to decide + * whether to inline them. + * Then, in the NAPI polling function: + * + * while (packets < budget) { + * // ... + * if (!driver_xsk_run(xdp, &bq, napi, &rs, desc)) + * break; + * } + * driver_xsk_finalize_rx(&bq); + */ + +/** + * LIBETH_XSK_DEFINE_FLUSH_TX - define a driver XSk ``XDP_TX`` flush function + * @name: name of the function to define + * @prep: driver callback to clean an XDPSQ + * @xmit: driver callback to write a HW Tx descriptor + */ +#define LIBETH_XSK_DEFINE_FLUSH_TX(name, prep, xmit) \ + __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, xsk) + +/** + * LIBETH_XSK_DEFINE_RUN_PROG - define a driver XDP program run function + * @name: name of the function to define + * @flush: driver callback to flush an XSk ``XDP_TX`` bulk + */ +#define LIBETH_XSK_DEFINE_RUN_PROG(name, flush) \ + u32 __LIBETH_XDP_DEFINE_RUN_PROG(name, flush, xsk) + +/** + * LIBETH_XSK_DEFINE_RUN_PASS - define a driver buffer process + pass function + * @name: name of the function to define + * @run: driver callback to run XDP program (above) + * @populate: driver callback to fill an skb with HW descriptor info + */ +#define LIBETH_XSK_DEFINE_RUN_PASS(name, run, populate) \ + bool __LIBETH_XDP_DEFINE_RUN_PASS(name, run, populate, xsk) + +/** + * LIBETH_XSK_DEFINE_RUN - define a driver buffer process, run + pass function + * @name: name of the function to define + * @run: name of the XDP prog run function to define + * @flush: driver callback to flush an XSk ``XDP_TX`` bulk + * @populate: driver callback to fill an skb with HW descriptor info + */ +#define LIBETH_XSK_DEFINE_RUN(name, run, flush, populate) \ + __LIBETH_XDP_DEFINE_RUN(name, run, flush, populate, XSK) + +/** + * LIBETH_XSK_DEFINE_FINALIZE - define a driver XSk NAPI poll finalize function + * @name: name of the function to define + * @flush: driver callback to flush an XSk ``XDP_TX`` bulk + * @finalize: driver callback to finalize an XDPSQ and run the timer + */ +#define LIBETH_XSK_DEFINE_FINALIZE(name, flush, finalize) \ + __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, xsk) + #endif /* __LIBETH_XSK_H */ -- cgit v1.2.3 From 3ced71a8b39e84f91a4fa9d42e85815515f9b1bc Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:33 +0200 Subject: libeth: xsk: add XSkFQ refill and XSk wakeup helpers XSkFQ refill is pretty generic across the drivers minus FQ descriptor filling and can easily be unified with one inline callback. XSk wakeup is usually not, but here, instead of commonly used "SW interrupts", I picked firing an IPI. In most tests, it showed better performance; it also provides better control for userspace on which CPU will handle the xmit, as SW interrupts honor IRQ affinity no matter which core produces XSk xmit descs (while XDPSQs are associated 1:1 with cores having the same ID). Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/xsk.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'include') diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index f3f338e566fc..213778a68476 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -584,4 +584,102 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, #define LIBETH_XSK_DEFINE_FINALIZE(name, flush, finalize) \ __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, xsk) +/* Refilling */ + +/** + * struct libeth_xskfq - structure representing an XSk buffer (fill) queue + * @fp: hotpath part of the structure + * @pool: &xsk_buff_pool for buffer management + * @fqes: array of XSk buffer pointers + * @descs: opaque pointer to the HW descriptor array + * @ntu: index of the next buffer to poll + * @count: number of descriptors/buffers the queue has + * @pending: current number of XSkFQEs to refill + * @thresh: threshold below which the queue is refilled + * @buf_len: HW-writeable length per each buffer + * @nid: ID of the closest NUMA node with memory + */ +struct libeth_xskfq { + struct_group_tagged(libeth_xskfq_fp, fp, + struct xsk_buff_pool *pool; + struct libeth_xdp_buff **fqes; + void *descs; + + u32 ntu; + u32 count; + ); + + /* Cold fields */ + u32 pending; + u32 thresh; + + u32 buf_len; + int nid; +}; + +int libeth_xskfq_create(struct libeth_xskfq *fq); +void libeth_xskfq_destroy(struct libeth_xskfq *fq); + +/** + * libeth_xsk_buff_xdp_get_dma - get DMA address of XSk &libeth_xdp_buff + * @xdp: buffer to get the DMA addr for + */ +#define libeth_xsk_buff_xdp_get_dma(xdp) \ + xsk_buff_xdp_get_dma(&(xdp)->base) + +/** + * libeth_xskfqe_alloc - allocate @n XSk Rx buffers + * @fq: hotpath part of the XSkFQ, usually onstack + * @n: number of buffers to allocate + * @fill: driver callback to write DMA addresses to HW descriptors + * + * Note that @fq->ntu gets updated, but ::pending must be recalculated + * by the caller. + * + * Return: number of buffers refilled. + */ +static __always_inline u32 +libeth_xskfqe_alloc(struct libeth_xskfq_fp *fq, u32 n, + void (*fill)(const struct libeth_xskfq_fp *fq, u32 i)) +{ + u32 this, ret, done = 0; + struct xdp_buff **xskb; + + this = fq->count - fq->ntu; + if (likely(this > n)) + this = n; + +again: + xskb = (typeof(xskb))&fq->fqes[fq->ntu]; + ret = xsk_buff_alloc_batch(fq->pool, xskb, this); + + for (u32 i = 0, ntu = fq->ntu; likely(i < ret); i++) + fill(fq, ntu + i); + + done += ret; + fq->ntu += ret; + + if (likely(fq->ntu < fq->count) || unlikely(ret < this)) + goto out; + + fq->ntu = 0; + + if (this < n) { + this = n - this; + goto again; + } + +out: + return done; +} + +/* .ndo_xsk_wakeup */ + +void libeth_xsk_init_wakeup(call_single_data_t *csd, struct napi_struct *napi); +void libeth_xsk_wakeup(call_single_data_t *csd, u32 qid); + +/* Pool setup */ + +int libeth_xsk_setup_pool(struct net_device *dev, u32 qid, bool enable); + #endif /* __LIBETH_XSK_H */ -- cgit v1.2.3 From 80bae9df2108cb72a060ee5235614d7c072af1de Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 12 Jun 2025 18:02:34 +0200 Subject: libeth: xdp, xsk: access adjacent u32s as u64 where applicable On 64-bit systems, writing/reading one u64 is faster than two u32s even when they're are adjacent in a struct. The compilers won't guarantee they will combine those; I observed both successful and unsuccessful attempts with both GCC and Clang, and it's not easy to say what it depends on. There's a few places in libeth_xdp winning up to several percent from combined access (both performance and object code size, especially when unrolling). Add __LIBETH_WORD_ACCESS and use it there on LE. Drivers are free to optimize HW-specific callbacks under the same definition. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/xdp.h | 29 ++++++++++++++++++++++++++--- include/net/libeth/xsk.h | 10 +++++----- 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index dba09a9168f1..6ce6aec6884c 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -475,6 +475,21 @@ struct libeth_xdp_tx_desc { ((const void *)(uintptr_t)(priv)); \ }) +/* + * On 64-bit systems, assigning one u64 is faster than two u32s. When ::len + * occupies lowest 32 bits (LE), whole ::opts can be assigned directly instead. + */ +#ifdef __LITTLE_ENDIAN +#define __LIBETH_WORD_ACCESS 1 +#endif +#ifdef __LIBETH_WORD_ACCESS +#define __libeth_xdp_tx_len(flen, ...) \ + .opts = ((flen) | FIELD_PREP(GENMASK_ULL(63, 32), (__VA_ARGS__ + 0))) +#else +#define __libeth_xdp_tx_len(flen, ...) \ + .len = (flen), .flags = (__VA_ARGS__ + 0) +#endif + /** * libeth_xdp_tx_xmit_bulk - main XDP Tx function * @bulk: array of frames to send @@ -870,8 +885,7 @@ static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq, bq->bulk[bq->count++] = (typeof(*bq->bulk)){ .xdpf = xdpf, - .len = xdpf->len, - .flags = LIBETH_XDP_TX_FIRST, + __libeth_xdp_tx_len(xdpf->len, LIBETH_XDP_TX_FIRST), }; if (!xdp_frame_has_frags(xdpf)) @@ -902,7 +916,7 @@ static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq, bq->bulk[bq->count++] = (typeof(*bq->bulk)){ .dma = dma, - .len = skb_frag_size(frag), + __libeth_xdp_tx_len(skb_frag_size(frag)), }; return true; @@ -1260,6 +1274,7 @@ bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp, * Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer * head with the Rx buffer data: data pointer, length, headroom, and * truesize/tailroom. Zeroes the flags. + * Uses faster single u64 write instead of per-field access. */ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, const struct libeth_fqe *fqe, @@ -1267,7 +1282,15 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, { const struct page *page = __netmem_to_page(fqe->netmem); +#ifdef __LIBETH_WORD_ACCESS + static_assert(offsetofend(typeof(xdp->base), flags) - + offsetof(typeof(xdp->base), frame_sz) == + sizeof(u64)); + + *(u64 *)&xdp->base.frame_sz = fqe->truesize; +#else xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); +#endif xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset, page->pp->p.offset, len, true); } diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 213778a68476..481a7b28e6f2 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -26,8 +26,8 @@ static inline bool libeth_xsk_tx_queue_head(struct libeth_xdp_tx_bulk *bq, { bq->bulk[bq->count++] = (typeof(*bq->bulk)){ .xsk = xdp, - .len = xdp->base.data_end - xdp->data, - .flags = LIBETH_XDP_TX_FIRST, + __libeth_xdp_tx_len(xdp->base.data_end - xdp->data, + LIBETH_XDP_TX_FIRST), }; if (likely(!xdp_buff_has_frags(&xdp->base))) @@ -48,7 +48,7 @@ static inline void libeth_xsk_tx_queue_frag(struct libeth_xdp_tx_bulk *bq, { bq->bulk[bq->count++] = (typeof(*bq->bulk)){ .xsk = frag, - .len = frag->base.data_end - frag->data, + __libeth_xdp_tx_len(frag->base.data_end - frag->data), }; } @@ -199,7 +199,7 @@ __libeth_xsk_xmit_fill_buf_md(const struct xdp_desc *xdesc, ctx = xsk_buff_raw_get_ctx(sq->pool, xdesc->addr); desc = (typeof(desc)){ .addr = ctx.dma, - .len = xdesc->len, + __libeth_xdp_tx_len(xdesc->len), }; BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo)); @@ -226,7 +226,7 @@ __libeth_xsk_xmit_fill_buf(const struct xdp_desc *xdesc, { return (struct libeth_xdp_tx_desc){ .addr = xsk_buff_raw_get_dma(sq->pool, xdesc->addr), - .len = xdesc->len, + __libeth_xdp_tx_len(xdesc->len), }; } -- cgit v1.2.3 From acc379c63ade8e247fb792ccdd4ae9a208530c1a Mon Sep 17 00:00:00 2001 From: Andrea della Porta Date: Thu, 29 May 2025 15:50:38 +0200 Subject: dt-bindings: clock: Add RaspberryPi RP1 clock bindings Add device tree bindings for the clock generator found in RP1 multi function device, and relative entries in MAINTAINERS file. Signed-off-by: Andrea della Porta Reviewed-by: Krzysztof Kozlowski Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20250529135052.28398-1-andrea.porta@suse.com Signed-off-by: Florian Fainelli --- include/dt-bindings/clock/raspberrypi,rp1-clocks.h | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/dt-bindings/clock/raspberrypi,rp1-clocks.h (limited to 'include') diff --git a/include/dt-bindings/clock/raspberrypi,rp1-clocks.h b/include/dt-bindings/clock/raspberrypi,rp1-clocks.h new file mode 100644 index 000000000000..248efb895f35 --- /dev/null +++ b/include/dt-bindings/clock/raspberrypi,rp1-clocks.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2021 Raspberry Pi Ltd. + */ + +#ifndef __DT_BINDINGS_CLOCK_RASPBERRYPI_RP1 +#define __DT_BINDINGS_CLOCK_RASPBERRYPI_RP1 + +#define RP1_PLL_SYS_CORE 0 +#define RP1_PLL_AUDIO_CORE 1 +#define RP1_PLL_VIDEO_CORE 2 + +#define RP1_PLL_SYS 3 +#define RP1_PLL_AUDIO 4 +#define RP1_PLL_VIDEO 5 + +#define RP1_PLL_SYS_PRI_PH 6 +#define RP1_PLL_SYS_SEC_PH 7 +#define RP1_PLL_AUDIO_PRI_PH 8 + +#define RP1_PLL_SYS_SEC 9 +#define RP1_PLL_AUDIO_SEC 10 +#define RP1_PLL_VIDEO_SEC 11 + +#define RP1_CLK_SYS 12 +#define RP1_CLK_SLOW_SYS 13 +#define RP1_CLK_DMA 14 +#define RP1_CLK_UART 15 +#define RP1_CLK_ETH 16 +#define RP1_CLK_PWM0 17 +#define RP1_CLK_PWM1 18 +#define RP1_CLK_AUDIO_IN 19 +#define RP1_CLK_AUDIO_OUT 20 +#define RP1_CLK_I2S 21 +#define RP1_CLK_MIPI0_CFG 22 +#define RP1_CLK_MIPI1_CFG 23 +#define RP1_CLK_PCIE_AUX 24 +#define RP1_CLK_USBH0_MICROFRAME 25 +#define RP1_CLK_USBH1_MICROFRAME 26 +#define RP1_CLK_USBH0_SUSPEND 27 +#define RP1_CLK_USBH1_SUSPEND 28 +#define RP1_CLK_ETH_TSU 29 +#define RP1_CLK_ADC 30 +#define RP1_CLK_SDIO_TIMER 31 +#define RP1_CLK_SDIO_ALT_SRC 32 +#define RP1_CLK_GP0 33 +#define RP1_CLK_GP1 34 +#define RP1_CLK_GP2 35 +#define RP1_CLK_GP3 36 +#define RP1_CLK_GP4 37 +#define RP1_CLK_GP5 38 +#define RP1_CLK_VEC 39 +#define RP1_CLK_DPI 40 +#define RP1_CLK_MIPI0_DPI 41 +#define RP1_CLK_MIPI1_DPI 42 + +/* Extra PLL output channels - RP1B0 only */ +#define RP1_PLL_VIDEO_PRI_PH 43 +#define RP1_PLL_AUDIO_TERN 44 + +#endif -- cgit v1.2.3 From 49d63971f96349cdcff89d21786e3c804e7cd4c0 Mon Sep 17 00:00:00 2001 From: Andrea della Porta Date: Thu, 29 May 2025 15:50:44 +0200 Subject: misc: rp1: RaspberryPi RP1 misc driver The RaspberryPi RP1 is a PCI multi function device containing peripherals ranging from Ethernet to USB controller, I2C, SPI and others. Implement a bare minimum driver to operate the RP1, leveraging actual OF based driver implementations for the on-board peripherals by loading a devicetree overlay during driver probe if the RP1 node is not already present in the DT. The peripherals are accessed by mapping MMIO registers starting from PCI BAR1 region. With the overlay approach we can achieve more generic and agnostic approach to managing this chipset, being that it is a PCI endpoint and could possibly be reused in other hw implementations. The presented approach is also used by Bootlin's Microchip LAN966x patchset (see link) as well, for a similar chipset. In this case, the inclusion tree for the DT overlay is as follow (the arrow points to the includer): rp1-pci.dtso <---- rp1-common.dtsi On the other hand, to ensure compatibility with downstream, this driver can also work with a DT already comprising the RP1 node, so the dynamically loaded overlay will not be used if the DT is already fully defined. The reason why this driver is contained in drivers/misc has been paved by Bootlin's LAN966X driver, which first used the overlay approach to implement non discoverable peripherals behind a PCI bus. For RP1, the same arguments apply: it's not used as an SoC since the driver code is not running on-chip and is not like an MFD since it does not really need all the MFD infrastructure (shared regs, etc.). So, for this particular use, misc has been proposed and deemed as a good choice. For further details about that please check the links. This driver is heavily based on downstream code from RaspberryPi Foundation, and the original author is Phil Elwell. Link: https://datasheets.raspberrypi.com/rp1/rp1-peripherals.pdf Link: https://lore.kernel.org/all/20240612140208.GC1504919@google.com/ Link: https://lore.kernel.org/all/83f7fa09-d0e6-4f36-a27d-cee08979be2a@app.fastmail.com/ Link: https://lore.kernel.org/all/2024081356-mutable-everyday-6f9d@gregkh/ Link: https://lore.kernel.org/all/20240808154658.247873-1-herve.codina@bootlin.com/ Signed-off-by: Andrea della Porta Acked-by: Bjorn Helgaas # quirks.c, pci_ids.h Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250529135052.28398-7-andrea.porta@suse.com Signed-off-by: Florian Fainelli --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e2d71b6fdd84..92ffc4373f6d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2624,6 +2624,9 @@ #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 +#define PCI_VENDOR_ID_RPI 0x1de4 +#define PCI_DEVICE_ID_RPI_RP1_C0 0x0001 + #define PCI_VENDOR_ID_ALIBABA 0x1ded #define PCI_VENDOR_ID_CXL 0x1e98 -- cgit v1.2.3 From 594902c986e269660302f09df9ec4bf1cf017b77 Mon Sep 17 00:00:00 2001 From: Qinyun Tan Date: Sat, 31 May 2025 02:20:53 +0800 Subject: x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Tony Luck Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com --- include/linux/resctrl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9ba771f2ddea..6fb4894b8cfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -159,7 +159,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -170,7 +170,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned int ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; -- cgit v1.2.3 From ae1ae11fb277f1335d6bcd4935ba0ea985af3c32 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 13 Jun 2025 15:58:00 -0400 Subject: audit,module: restore audit logging in load failure case The move of the module sanity check to earlier skipped the audit logging call in the case of failure and to a place where the previously used context is unavailable. Add an audit logging call for the module loading failure case and get the module name when possible. Link: https://issues.redhat.com/browse/RHEL-52839 Fixes: 02da2cbab452 ("module: move check_modinfo() early to early_mod_check()") Signed-off-by: Richard Guy Briggs Reviewed-by: Petr Pavlu Signed-off-by: Paul Moore --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 0050ef288ab3..a394614ccd0b 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -417,7 +417,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_openat2_how(struct open_how *how); -extern void __audit_log_kern_module(char *name); +extern void __audit_log_kern_module(const char *name); extern void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); @@ -519,7 +519,7 @@ static inline void audit_openat2_how(struct open_how *how) __audit_openat2_how(how); } -static inline void audit_log_kern_module(char *name) +static inline void audit_log_kern_module(const char *name) { if (!audit_dummy_context()) __audit_log_kern_module(name); @@ -677,9 +677,8 @@ static inline void audit_mmap_fd(int fd, int flags) static inline void audit_openat2_how(struct open_how *how) { } -static inline void audit_log_kern_module(char *name) -{ -} +static inline void audit_log_kern_module(const char *name) +{ } static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { } -- cgit v1.2.3 From 3cfbde048b1c0606d0e02ecb0319c8748421bc7c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 09:46:16 -0400 Subject: net/tcp_ao: tracing: Hide tcp_ao events under CONFIG_TCP_AO Several of the tcp_ao events are only called when CONFIG_TCP_AO is defined. As each event can take up to 5K regardless if they are used or not, it's best not to define them when they are not used. Add #ifdef around these events when they are not used. Signed-off-by: Steven Rostedt (Google) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250612094616.4222daf0@batman.local.home Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 95f59c1a6f57..54e60c6009e3 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -692,6 +692,7 @@ DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, TP_ARGS(sk, skb, keyid, rnext, maclen) ); +#ifdef CONFIG_TCP_AO DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), @@ -830,6 +831,7 @@ DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); +#endif /* CONFIG_TCP_AO */ #endif /* _TRACE_TCP_H */ -- cgit v1.2.3 From 260948993a9f99428f801dcb40654205e74aaa47 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 13 Jun 2025 04:31:30 -0700 Subject: netpoll: remove __netpoll_cleanup from exported API Since commit 97714695ef90 ("net: netconsole: Defer netpoll cleanup to avoid lock release during list traversal"), netconsole no longer uses __netpoll_cleanup(). With no remaining users, remove this function from the exported netpoll API. The function remains available internally within netpoll for use by netpoll_cleanup(). Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250613-rework-v3-1-0752bf2e6912@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 0477208ed9ff..a637e5115254 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -69,7 +69,6 @@ void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); -void __netpoll_cleanup(struct netpoll *np); void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); void do_netpoll_cleanup(struct netpoll *np); -- cgit v1.2.3 From afb023329c07af7a9144901a1dad3a80d9e177b1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 13 Jun 2025 04:31:31 -0700 Subject: netpoll: expose netpoll logging macros in public header Move np_info(), np_err(), and np_notice() macros from internal implementation to the public netpoll header file to make them available for use by netpoll consumers. These logging macros provide consistent formatting for netpoll-related messages by automatically prefixing log output with the netpoll instance name. The goal is to use the exact same format that is being displayed today, instead of creating something netconsole-specific. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250613-rework-v3-2-0752bf2e6912@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index a637e5115254..72086b8a3dec 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -42,6 +42,13 @@ struct netpoll { struct work_struct refill_wq; }; +#define np_info(np, fmt, ...) \ + pr_info("%s: " fmt, np->name, ##__VA_ARGS__) +#define np_err(np, fmt, ...) \ + pr_err("%s: " fmt, np->name, ##__VA_ARGS__) +#define np_notice(np, fmt, ...) \ + pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) + struct netpoll_info { refcount_t refcnt; -- cgit v1.2.3 From 5a34c9a8536511b6bd43d85bb0211077226c6fdb Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 13 Jun 2025 04:31:32 -0700 Subject: netpoll: relocate netconsole-specific functions to netconsole module Move netpoll_parse_ip_addr() and netpoll_parse_options() from the generic netpoll module to the netconsole module where they are actually used. These functions were originally placed in netpoll but are only consumed by netconsole. This refactoring improves code organization by: - Removing unnecessary exported symbols from netpoll - Making netpoll_parse_options() static (no longer needs global visibility) - Reducing coupling between netpoll and netconsole modules The functions remain functionally identical - this is purely a code reorganization to better reflect their actual usage patterns. Here are the changes: 1) Move both functions from netpoll to netconsole 2) Add static to netpoll_parse_options() 3) Removed the EXPORT_SYMBOL() PS: This diff does not change the function format, so, it is easy to review, but, checkpatch will not be happy. A follow-up patch will address the current issues reported by checkpatch. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250613-rework-v3-3-0752bf2e6912@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 72086b8a3dec..1b8000954e52 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -73,7 +73,6 @@ static inline void netpoll_poll_enable(struct net_device *dev) { return; } int netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); -int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); void __netpoll_free(struct netpoll *np); -- cgit v1.2.3 From ccc7edf0ada83d395b634506eff9616360a99b5a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 13 Jun 2025 04:31:33 -0700 Subject: netpoll: move netpoll_print_options to netconsole Move netpoll_print_options() from net/core/netpoll.c to drivers/net/netconsole.c and make it static. This function is only used by netconsole, so there's no need to export it or keep it in the public netpoll API. This reduces the netpoll API surface and improves code locality by keeping netconsole-specific functionality within the netconsole driver. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250613-rework-v3-4-0752bf2e6912@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 1b8000954e52..735e65c3cc11 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -72,7 +72,6 @@ static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif int netpoll_send_udp(struct netpoll *np, const char *msg, int len); -void netpoll_print_options(struct netpoll *np); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); void __netpoll_free(struct netpoll *np); -- cgit v1.2.3 From 7768c5f417336fa58dbfef9bb7ecd7eeec6d8886 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 13 Jun 2025 10:00:34 -0700 Subject: net: mana: Add handler for hardware servicing events To collaborate with hardware servicing events, upon receiving the special EQE notification from the HW channel, remove the devices on this bus. Then, after a waiting period based on the device specs, rescan the parent bus to recover the devices. Signed-off-by: Haiyang Zhang Reviewed-by: Shradha Gupta Reviewed-by: Simon Horman Link: https://patch.msgid.link/1749834034-18498-1-git-send-email-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- include/net/mana/gdma.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 3ce56a816425..bfae59202669 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -58,7 +58,7 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, GDMA_EQE_HWC_INIT_DATA = 130, GDMA_EQE_HWC_INIT_DONE = 131, - GDMA_EQE_HWC_SOC_RECONFIG = 132, + GDMA_EQE_HWC_FPGA_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, GDMA_EQE_HWC_SOC_SERVICE = 134, GDMA_EQE_RNIC_QP_FATAL = 176, @@ -403,6 +403,8 @@ struct gdma_context { u32 test_event_eq_id; bool is_pf; + bool in_service; + phys_addr_t bar0_pa; void __iomem *bar0_va; void __iomem *shm_base; @@ -578,12 +580,16 @@ enum { /* Driver can handle holes (zeros) in the device list */ #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) +/* Driver can self reset on FPGA Reconfig EQE notification */ +#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ - GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP) + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) #define GDMA_DRV_CAP_FLAGS2 0 -- cgit v1.2.3 From 25d51ebf0f54f9c2424f28bb29125cf24f120df0 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 11 Jun 2025 16:31:51 +0530 Subject: octeontx2: Set appropriate PF, VF masks and shifts based on silicon Number of RVU PFs on CN20K silicon have increased to 96 from maximum of 32 that were supported on earlier silicons. Every RVU PF and VF is identified by HW using a 16bit PF_FUNC value. Due to the change in Max number of PFs in CN20K, the bit encoding of this PF_FUNC has changed. This patch handles the change by using helper functions(using silicon check) to use PF,VF masks and shifts to support both new silicon CN20K, OcteonTx series. These helper functions are used in different modules. Also moved the NIX AF register offset macros to other files which will be posted in coming patches. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sai Krishna Signed-off-by: Sunil Kovvuri Goutham Link: https://patch.msgid.link/1749639716-13868-2-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- include/linux/soc/marvell/silicons.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/soc/marvell/silicons.h (limited to 'include') diff --git a/include/linux/soc/marvell/silicons.h b/include/linux/soc/marvell/silicons.h new file mode 100644 index 000000000000..66bb9bfaf17d --- /dev/null +++ b/include/linux/soc/marvell/silicons.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2024 Marvell. + */ + +#ifndef __SOC_SILICON_H +#define __SOC_SILICON_H + +#include +#include + +#if defined(CONFIG_ARM64) + +#define CN20K_CHIPID 0x20 +/* + * Silicon check for CN20K family + */ +static inline bool is_cn20k(struct pci_dev *pdev) +{ + return (pdev->subsystem_device & 0xFF) == CN20K_CHIPID; +} +#else +#define is_cn20k(pdev) ((void)(pdev), 0) +#endif + +#endif /* __SOC_SILICON_H */ -- cgit v1.2.3 From 2796ff1e3dcae7a3568f8e428ec9d32a8ee2fb36 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 14 Jun 2025 22:30:43 +0200 Subject: net: phy: add flag is_genphy_driven to struct phy_device In order to get rid of phy_driver_is_genphy() and phy_driver_is_genphy_10g(), as first step add and use a flag phydev->is_genphy_driven. Signed-off-by: Heiner Kallweit Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/3f3ad6dc-402e-4915-8d5a-2306b6d5562b@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8e2e4fcd050e..32ed27f10639 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -528,6 +528,7 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @wol_enabled: Set to true if the PHY or the attached MAC have Wake-on-LAN * enabled. + * @is_genphy_driven: PHY is driven by one of the generic PHY drivers * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. * @@ -631,6 +632,7 @@ struct phy_device { unsigned is_on_sfp_module:1; unsigned mac_managed_pm:1; unsigned wol_enabled:1; + unsigned is_genphy_driven:1; unsigned autoneg:1; /* The most recently read link state */ -- cgit v1.2.3 From 59e74c92e67e2951d829f9b0d78c5dc1df7c4c88 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 14 Jun 2025 22:31:57 +0200 Subject: net: phy: improve phy_driver_is_genphy Use new flag phydev->is_genphy_driven to simplify this function. Note that this includes a minor functional change: Now this function returns true if ANY of the genphy drivers is bound to the PHY device. We have only one user in DSA driver mt7530, and there the functional change doesn't matter. Signed-off-by: Heiner Kallweit Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/c9ac3a7d-262a-425d-9153-97fe3ca6280a@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 32ed27f10639..97a09e5743ef 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1295,6 +1295,17 @@ static inline bool phy_is_started(struct phy_device *phydev) return phydev->state >= PHY_UP; } +/** + * phy_driver_is_genphy - Convenience function to check whether PHY is driven + * by one of the generic PHY drivers + * @phydev: The phy_device struct + * Return: true if PHY is driven by one of the genphy drivers + */ +static inline bool phy_driver_is_genphy(struct phy_device *phydev) +{ + return phydev->is_genphy_driven; +} + /** * phy_disable_eee_mode - Don't advertise an EEE mode. * @phydev: The phy_device struct @@ -2097,7 +2108,6 @@ module_exit(phy_module_exit) #define module_phy_driver(__phy_drivers) \ phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers)) -bool phy_driver_is_genphy(struct phy_device *phydev); bool phy_driver_is_genphy_10g(struct phy_device *phydev); #endif /* __PHY_H */ -- cgit v1.2.3 From 42ed7f7e94da01391d3519ffb5747698d2be0a67 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 14 Jun 2025 22:32:47 +0200 Subject: net: phy: remove phy_driver_is_genphy_10g Remove now unused function phy_driver_is_genphy_10g(). Signed-off-by: Heiner Kallweit Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/49b0589a-9604-4ee9-add5-28fbbbe2c2f3@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 97a09e5743ef..b037aab7b71d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2108,6 +2108,4 @@ module_exit(phy_module_exit) #define module_phy_driver(__phy_drivers) \ phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers)) -bool phy_driver_is_genphy_10g(struct phy_device *phydev); - #endif /* __PHY_H */ -- cgit v1.2.3 From 5da8a8b8090b5f79a816ba016af3a70a9d7287bf Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Wed, 11 Jun 2025 07:10:01 -0700 Subject: PCI/MSI: Export pci_msix_prepare_desc() for dynamic MSI-X allocations For supporting dynamic MSI-X vector allocation by PCI controllers, enabling the flag MSI_FLAG_PCI_MSIX_ALLOC_DYN is not enough, msix_prepare_msi_desc() to prepare the MSI descriptor is also needed. Export pci_msix_prepare_desc() to allow PCI controllers to support dynamic MSI-X vector allocation. Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Thomas Gleixner Reviewed-by: Saurabh Sengar Acked-by: Bjorn Helgaas --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 6863540f4b71..7f254bde5426 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -706,6 +706,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); +void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *arg, + struct msi_desc *desc); #else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { -- cgit v1.2.3 From 755391121038c06cb653241aa94dcabd87179f62 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Wed, 11 Jun 2025 07:11:13 -0700 Subject: net: mana: Allocate MSI-X vectors dynamically Currently, the MANA driver allocates MSI-X vectors statically based on MANA_MAX_NUM_QUEUES and num_online_cpus() values and in some cases ends up allocating more vectors than it needs. This is because, by this time we do not have a HW channel and do not know how many IRQs should be allocated. To avoid this, we allocate 1 MSI-X vector during the creation of HWC and after getting the value supported by hardware, dynamically add the remaining MSI-X vectors. Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang --- include/net/mana/gdma.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 3ce56a816425..87162ba96d91 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -388,7 +388,7 @@ struct gdma_context { unsigned int max_num_queues; unsigned int max_num_msix; unsigned int num_msix_usable; - struct gdma_irq_context *irq_contexts; + struct xarray irq_contexts; /* L2 MTU */ u16 adapter_mtu; @@ -578,12 +578,16 @@ enum { /* Driver can handle holes (zeros) in the device list */ #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) +/* Driver supports dynamic MSI-X vector allocation */ +#define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ - GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP) + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ + GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT) #define GDMA_DRV_CAP_FLAGS2 0 -- cgit v1.2.3 From c3021d6a80ff05034dfee494115ec71f1954e311 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 16 Jun 2025 01:23:59 +0100 Subject: media: v4l2-jpeg: Remove unused v4l2_jpeg_parse_* wrappers The functions: v4l2_jpeg_parse_huffman_tables() v4l2_jpeg_parse_quantization_tables() v4l2_jpeg_parse_scan_header() and v4l2_jpeg_parse_frame_header() were added in 2020 by commit 50733b5b9102 ("media: add v4l2 JPEG helpers") but have remained unused. Remove them. They're all just wrappers around an underlying set of helpers, which are all still called via v4l2_jpeg_parse_header(). Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Hans Verkuil --- include/media/v4l2-jpeg.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-jpeg.h b/include/media/v4l2-jpeg.h index b65658a02e3c..62dda1560275 100644 --- a/include/media/v4l2-jpeg.h +++ b/include/media/v4l2-jpeg.h @@ -169,15 +169,6 @@ struct v4l2_jpeg_header { int v4l2_jpeg_parse_header(void *buf, size_t len, struct v4l2_jpeg_header *out); -int v4l2_jpeg_parse_frame_header(void *buf, size_t len, - struct v4l2_jpeg_frame_header *frame_header); -int v4l2_jpeg_parse_scan_header(void *buf, size_t len, - struct v4l2_jpeg_scan_header *scan_header); -int v4l2_jpeg_parse_quantization_tables(void *buf, size_t len, u8 precision, - struct v4l2_jpeg_reference *q_tables); -int v4l2_jpeg_parse_huffman_tables(void *buf, size_t len, - struct v4l2_jpeg_reference *huffman_tables); - extern const u8 v4l2_jpeg_zigzag_scan_index[V4L2_JPEG_PIXELS_IN_BLOCK]; extern const u8 v4l2_jpeg_ref_table_luma_qt[V4L2_JPEG_PIXELS_IN_BLOCK]; extern const u8 v4l2_jpeg_ref_table_chroma_qt[V4L2_JPEG_PIXELS_IN_BLOCK]; -- cgit v1.2.3 From b29929b819f35503024c6a7e6ad442f6e36c68a0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Jun 2025 20:16:21 +0200 Subject: driver core: Add device_link_test() for testing device link flags To avoid coding mistakes like the one fixed by commit 3860cbe23963 ("PM: sleep: Fix bit masking operation"), introduce device_link_test() for testing device link flags and use it where applicable. No intentional functional impact. Signed-off-by: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/2793309.mvXUDI8C0e@rjwysocki.net Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 4940db137fff..afdd4f7c0d94 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1162,6 +1162,11 @@ void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); void device_link_wait_removal(void); +static inline bool device_link_test(const struct device_link *link, u32 flags) +{ + return !!(link->flags & flags); +} + /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From 5660ee54e7982f9097ddc684e90f15bdcc7fef4b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 2 Jun 2025 13:02:13 +0200 Subject: mm, slab: use frozen pages for large kmalloc Since slab pages are now frozen, it makes sense to have large kmalloc() objects behave same as small kmalloc(), as the choice between the two is an implementation detail depending on allocation size. Notably, increasing refcount on a slab page containing kmalloc() object is not possible anymore, so it should be consistent for large kmalloc pages. Therefore, change large kmalloc to use the frozen pages API. Because of some unexpected fallout in the slab pages case (see commit b9c0e49abfca ("mm: decline to manipulate the refcount on a slab page"), implement the same kind of checks and warnings as part of this change. Notably, networking code using sendpage_ok() to determine whether the page refcount can be manipulated in the network stack should continue behaving correctly. Before this change, the function returns true for large kmalloc pages and page refcount can be manipulated. After this change, the function will return false. Acked-by: Roman Gushchin Acked-by: Harry Yoo Signed-off-by: Vlastimil Babka --- include/linux/mm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..a35d5958603f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1325,6 +1325,8 @@ static inline void get_page(struct page *page) struct folio *folio = page_folio(page); if (WARN_ON_ONCE(folio_test_slab(folio))) return; + if (WARN_ON_ONCE(folio_test_large_kmalloc(folio))) + return; folio_get(folio); } @@ -1419,7 +1421,7 @@ static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); - if (folio_test_slab(folio)) + if (folio_test_slab(folio) || folio_test_large_kmalloc(folio)) return; folio_put(folio); -- cgit v1.2.3 From 20ca475d9860e14cf389f5a7d5ba9c6437d74613 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:20 +0100 Subject: mm: rename call_mmap/mmap_prepare to vfs_mmap/mmap_prepare The call_mmap() function violates the existing convention in include/linux/fs.h whereby invocations of virtual file system hooks is performed by functions prefixed with vfs_xxx(). Correct this by renaming call_mmap() to vfs_mmap(). This also avoids confusion as to the fact that f_op->mmap_prepare may be invoked here. Also rename __call_mmap_prepare() function to vfs_mmap_prepare() and adjust to accept a file parameter, this is useful later for nested file systems. Finally, fix up the VMA userland tests and ensure the mmap_prepare -> mmap shim is implemented there. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/8d389f4994fa736aa8f9172bef8533c10a9e9011.1750099179.git.lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f14..c66f235f9e4d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2276,7 +2276,7 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); -static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { if (file->f_op->mmap_prepare) return compat_vma_mmap_prepare(file, vma); @@ -2284,8 +2284,7 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } -static inline int __call_mmap_prepare(struct file *file, - struct vm_area_desc *desc) +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) { return file->f_op->mmap_prepare(desc); } -- cgit v1.2.3 From 2b5eac0f8c6e79bc152c8804f9f88d16717013ab Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:02:47 +0200 Subject: tty: introduce and use tty_port_tty_vhangup() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code (tty_get -> vhangup -> tty_put) is repeated on few places. Introduce a helper similar to tty_port_tty_hangup() (asynchronous) to handle even vhangup (synchronous). And use it on those places. In fact, reuse the tty_port_tty_hangup()'s code and call tty_vhangup() depending on a new bool parameter. Signed-off-by: "Jiri Slaby (SUSE)" Cc: Karsten Keil Cc: David Lin Cc: Johan Hovold Cc: Alex Elder Cc: Oliver Neukum Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Luiz Augusto von Dentz Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250611100319.186924-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 08f89a598366..021f9a8415c0 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -232,7 +232,7 @@ bool tty_port_carrier_raised(struct tty_port *port); void tty_port_raise_dtr_rts(struct tty_port *port); void tty_port_lower_dtr_rts(struct tty_port *port); void tty_port_hangup(struct tty_port *port); -void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); +void __tty_port_tty_hangup(struct tty_port *port, bool check_clocal, bool async); void tty_port_tty_wakeup(struct tty_port *port); int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); @@ -251,4 +251,14 @@ static inline int tty_port_users(struct tty_port *port) return port->count + port->blocked_open; } +static inline void tty_port_tty_hangup(struct tty_port *port, bool check_clocal) +{ + __tty_port_tty_hangup(port, check_clocal, true); +} + +static inline void tty_port_tty_vhangup(struct tty_port *port) +{ + __tty_port_tty_hangup(port, false, false); +} + #endif -- cgit v1.2.3 From 866380bcf10c810c1c7097641170d53bbe5239ce Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:02:52 +0200 Subject: tty: vt: use sane types for userspace API As discussed earlier (see the Link below), use the preferred ioctl types in vt.h (__u8, __u16, ...). These types are already used for the new VT_GETCONSIZECSRPOS. Therefore, the necessary includes are already present. Since now, the types are used for every structure defined in the header now. Note the kernel is built with -funsigned-char, therefore 'char' becomes '__u8' in here. Signed-off-by: "Jiri Slaby (SUSE)" Cc: Nicolas Pitre Link: https://lore.kernel.org/all/p7p83sq1-4ro2-o924-s9o2-30spr74n076o@syhkavp.arg/ Reviewed-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250611100319.186924-7-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vt.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index e5b0c492aa18..714483d68c69 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -19,11 +19,11 @@ #define VT_OPENQRY 0x5600 /* find available vt */ struct vt_mode { - char mode; /* vt mode */ - char waitv; /* if set, hang on writes if not active */ - short relsig; /* signal to raise on release req */ - short acqsig; /* signal to raise on acquisition */ - short frsig; /* unused (set to 0) */ + __u8 mode; /* vt mode */ + __u8 waitv; /* if set, hang on writes if not active */ + __s16 relsig; /* signal to raise on release req */ + __s16 acqsig; /* signal to raise on acquisition */ + __s16 frsig; /* unused (set to 0) */ }; #define VT_GETMODE 0x5601 /* get mode of active vt */ #define VT_SETMODE 0x5602 /* set mode of active vt */ @@ -32,9 +32,9 @@ struct vt_mode { #define VT_ACKACQ 0x02 /* acknowledge switch */ struct vt_stat { - unsigned short v_active; /* active vt */ - unsigned short v_signal; /* signal to send */ - unsigned short v_state; /* vt bitmask */ + __u16 v_active; /* active vt */ + __u16 v_signal; /* signal to send */ + __u16 v_state; /* vt bitmask */ }; #define VT_GETSTATE 0x5603 /* get global vt state info */ #define VT_SENDSIG 0x5604 /* signal to send to bitmask of vts */ @@ -46,19 +46,19 @@ struct vt_stat { #define VT_DISALLOCATE 0x5608 /* free memory associated to vt */ struct vt_sizes { - unsigned short v_rows; /* number of rows */ - unsigned short v_cols; /* number of columns */ - unsigned short v_scrollsize; /* number of lines of scrollback */ + __u16 v_rows; /* number of rows */ + __u16 v_cols; /* number of columns */ + __u16 v_scrollsize; /* number of lines of scrollback */ }; #define VT_RESIZE 0x5609 /* set kernel's idea of screensize */ struct vt_consize { - unsigned short v_rows; /* number of rows */ - unsigned short v_cols; /* number of columns */ - unsigned short v_vlin; /* number of pixel rows on screen */ - unsigned short v_clin; /* number of pixel rows per character */ - unsigned short v_vcol; /* number of pixel columns on screen */ - unsigned short v_ccol; /* number of pixel columns per character */ + __u16 v_rows; /* number of rows */ + __u16 v_cols; /* number of columns */ + __u16 v_vlin; /* number of pixel rows on screen */ + __u16 v_clin; /* number of pixel rows per character */ + __u16 v_vcol; /* number of pixel columns on screen */ + __u16 v_ccol; /* number of pixel columns per character */ }; #define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ #define VT_LOCKSWITCH 0x560B /* disallow vt switching */ @@ -66,21 +66,21 @@ struct vt_consize { #define VT_GETHIFONTMASK 0x560D /* return hi font mask */ struct vt_event { - unsigned int event; + __u32 event; #define VT_EVENT_SWITCH 0x0001 /* Console switch */ #define VT_EVENT_BLANK 0x0002 /* Screen blank */ #define VT_EVENT_UNBLANK 0x0004 /* Screen unblank */ #define VT_EVENT_RESIZE 0x0008 /* Resize display */ #define VT_MAX_EVENT 0x000F - unsigned int oldev; /* Old console */ - unsigned int newev; /* New console (if changing) */ - unsigned int pad[4]; /* Padding for expansion */ + __u32 oldev; /* Old console */ + __u32 newev; /* New console (if changing) */ + __u32 pad[4]; /* Padding for expansion */ }; #define VT_WAITEVENT 0x560E /* Wait for an event */ struct vt_setactivate { - unsigned int console; + __u32 console; struct vt_mode mode; }; -- cgit v1.2.3 From f1180ca37abe3d117e4a19be12142fe722612a7c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:02:53 +0200 Subject: tty: vt: use _IO() to define ioctl numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _IO*() is the proper way of defining ioctl numbers. All these vt numbers were synthetically built up the same way the _IO() macro does. So instead of implicit hex numbers, use _IO() properly. To not change the pre-existing numbers, use only _IO() (and not _IOR() or _IOW()). The latter would change the numbers indeed. Objdump of vt_ioctl.o reveals no difference with this patch. Again, VT_GETCONSIZECSRPOS already uses _IOR(), so everything is paved for this patch. Signed-off-by: "Jiri Slaby (SUSE)" Cc: Nicolas Pitre Reviewed-by: Nicolas Pitre Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250611100319.186924-8-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vt.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index 714483d68c69..b60fcdfb2746 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -14,9 +14,9 @@ /* Note: the ioctl VT_GETSTATE does not work for consoles 16 and higher (since it returns a short) */ -/* 0x56 is 'V', to avoid collision with termios and kd */ +/* 'V' to avoid collision with termios and kd */ -#define VT_OPENQRY 0x5600 /* find available vt */ +#define VT_OPENQRY _IO('V', 0x00) /* find available vt */ struct vt_mode { __u8 mode; /* vt mode */ @@ -25,8 +25,8 @@ struct vt_mode { __s16 acqsig; /* signal to raise on acquisition */ __s16 frsig; /* unused (set to 0) */ }; -#define VT_GETMODE 0x5601 /* get mode of active vt */ -#define VT_SETMODE 0x5602 /* set mode of active vt */ +#define VT_GETMODE _IO('V', 0x01) /* get mode of active vt */ +#define VT_SETMODE _IO('V', 0x02) /* set mode of active vt */ #define VT_AUTO 0x00 /* auto vt switching */ #define VT_PROCESS 0x01 /* process controls switching */ #define VT_ACKACQ 0x02 /* acknowledge switch */ @@ -36,21 +36,21 @@ struct vt_stat { __u16 v_signal; /* signal to send */ __u16 v_state; /* vt bitmask */ }; -#define VT_GETSTATE 0x5603 /* get global vt state info */ -#define VT_SENDSIG 0x5604 /* signal to send to bitmask of vts */ +#define VT_GETSTATE _IO('V', 0x03) /* get global vt state info */ +#define VT_SENDSIG _IO('V', 0x04) /* signal to send to bitmask of vts */ -#define VT_RELDISP 0x5605 /* release display */ +#define VT_RELDISP _IO('V', 0x05) /* release display */ -#define VT_ACTIVATE 0x5606 /* make vt active */ -#define VT_WAITACTIVE 0x5607 /* wait for vt active */ -#define VT_DISALLOCATE 0x5608 /* free memory associated to vt */ +#define VT_ACTIVATE _IO('V', 0x06) /* make vt active */ +#define VT_WAITACTIVE _IO('V', 0x07) /* wait for vt active */ +#define VT_DISALLOCATE _IO('V', 0x08) /* free memory associated to vt */ struct vt_sizes { __u16 v_rows; /* number of rows */ __u16 v_cols; /* number of columns */ __u16 v_scrollsize; /* number of lines of scrollback */ }; -#define VT_RESIZE 0x5609 /* set kernel's idea of screensize */ +#define VT_RESIZE _IO('V', 0x09) /* set kernel's idea of screensize */ struct vt_consize { __u16 v_rows; /* number of rows */ @@ -60,10 +60,10 @@ struct vt_consize { __u16 v_vcol; /* number of pixel columns on screen */ __u16 v_ccol; /* number of pixel columns per character */ }; -#define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ -#define VT_LOCKSWITCH 0x560B /* disallow vt switching */ -#define VT_UNLOCKSWITCH 0x560C /* allow vt switching */ -#define VT_GETHIFONTMASK 0x560D /* return hi font mask */ +#define VT_RESIZEX _IO('V', 0x0A) /* set kernel's idea of screensize + more */ +#define VT_LOCKSWITCH _IO('V', 0x0B) /* disallow vt switching */ +#define VT_UNLOCKSWITCH _IO('V', 0x0C) /* allow vt switching */ +#define VT_GETHIFONTMASK _IO('V', 0x0D) /* return hi font mask */ struct vt_event { __u32 event; @@ -77,14 +77,14 @@ struct vt_event { __u32 pad[4]; /* Padding for expansion */ }; -#define VT_WAITEVENT 0x560E /* Wait for an event */ +#define VT_WAITEVENT _IO('V', 0x0E) /* Wait for an event */ struct vt_setactivate { __u32 console; struct vt_mode mode; }; -#define VT_SETACTIVATE 0x560F /* Activate and set the mode of a console */ +#define VT_SETACTIVATE _IO('V', 0x0F) /* Activate and set the mode of a console */ /* get console size and cursor position */ struct vt_consizecsrpos { -- cgit v1.2.3 From fc9ceb501e38cc21066c1638993500b30eda8bdb Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:02:54 +0200 Subject: serial: 8250: sanitize uart_port::serial_{in,out}() types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uart_port::{serial_in,serial_out} (and plat_serial8250_port::* likewise) historically use: * 'unsigned int' for 32-bit register values in reads and writes, and * 'int' for offsets. Make them sane such that: * 'u32' is used for register values, and * 'unsigned int' is used for offsets. While at it, name hooks' parameters, so it is clear what is what. Signed-off-by: "Jiri Slaby (SUSE)" Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Ilpo Järvinen Cc: Andy Shevchenko Cc: Paul Cercueil Cc: Vladimir Zapolskiy Cc: Kunihiko Hayashi Cc: Masami Hiramatsu Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250611100319.186924-9-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 4 ++-- include/linux/serial_core.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 144de7a7948d..01efdce0fda0 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -46,8 +46,8 @@ struct plat_serial8250_port { unsigned int type; /* If UPF_FIXED_TYPE */ upf_t flags; /* UPF_* flags */ u16 bugs; /* port bugs */ - unsigned int (*serial_in)(struct uart_port *, int); - void (*serial_out)(struct uart_port *, int, int); + u32 (*serial_in)(struct uart_port *, unsigned int offset); + void (*serial_out)(struct uart_port *, unsigned int offset, u32 val); u32 (*dl_read)(struct uart_8250_port *up); void (*dl_write)(struct uart_8250_port *up, u32 value); void (*set_termios)(struct uart_port *, diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 914b5e97e056..d65b15449cfe 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -443,8 +443,8 @@ struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ - unsigned int (*serial_in)(struct uart_port *, int); - void (*serial_out)(struct uart_port *, int, int); + u32 (*serial_in)(struct uart_port *, unsigned int offset); + void (*serial_out)(struct uart_port *, unsigned int offset, u32 val); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); -- cgit v1.2.3 From b013ed403197f3f8c30ddb3ce66fe05a632b3493 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:22 +0100 Subject: fs: consistently use can_mmap_file() helper Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). Additionally, commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for nested file systems") permits the use of the .mmap_prepare() hook even in nested filesystems like overlayfs. There are a number of places where we check only for f_op->mmap - this is incorrect now mmap_prepare exists, so update all of these to use the general helper can_mmap_file(). Most notably, this updates the elf logic to allow for the ability to execute binaries on filesystems which have the .mmap_prepare hook, but additionally we update nested filesystems. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/b68145b609532e62bab603dd9686faa6562046ec.1750099179.git.lorenzo.stoakes@oracle.com Acked-by: Kees Cook Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c66f235f9e4d..d4fa1cb0755a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2260,7 +2260,7 @@ struct inode_operations { } ____cacheline_aligned; /* Did the driver provide valid mmap hook configuration? */ -static inline bool file_has_valid_mmap_hooks(struct file *file) +static inline bool can_mmap_file(struct file *file) { bool has_mmap = file->f_op->mmap; bool has_mmap_prepare = file->f_op->mmap_prepare; -- cgit v1.2.3 From 0335f6afd3488d1101f3b15014095fa51b978253 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:23 +0100 Subject: fs/dax: make it possible to check dev dax support without a VMA This is a prerequisite for adapting those filesystems to use the .mmap_prepare() hook for mmap()'ing which invoke this check as this hook does not have access to a VMA pointer. To effect this, change the signature of daxdev_mapping_supported() and update its callers (ext4 and xfs mmap()'ing hook code). Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/b09de1e8544384074165d92d048e80058d971286.1750099179.git.lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- include/linux/dax.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index dcc9fcdf14e4..78891518291d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -65,12 +65,13 @@ size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, /* * Check if given mapping is supported by the file / underlying device. */ -static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, - struct dax_device *dax_dev) +static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, + const struct inode *inode, + struct dax_device *dax_dev) { - if (!(vma->vm_flags & VM_SYNC)) + if (!(vm_flags & VM_SYNC)) return true; - if (!IS_DAX(file_inode(vma->vm_file))) + if (!IS_DAX(inode)) return false; return dax_synchronous(dax_dev); } @@ -110,10 +111,11 @@ static inline void set_dax_nomc(struct dax_device *dax_dev) static inline void set_dax_synchronous(struct dax_device *dax_dev) { } -static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, - struct dax_device *dax_dev) +static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, + const struct inode *inode, + struct dax_device *dax_dev) { - return !(vma->vm_flags & VM_SYNC); + return !(vm_flags & VM_SYNC); } static inline size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) -- cgit v1.2.3 From 5b44297bcfa49ee197cdb8ca6164bef120c4e73c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:26 +0100 Subject: mm/filemap: introduce generic_file_*_mmap_prepare() helpers Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). The generic mmap handlers are very simple, so we can very easily convert these in advance of converting file systems which use them. This patch does so. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/30622c1f0b98c66840bc8c02668bda276a810b70.1750099179.git.lorenzo.stoakes@oracle.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d4fa1cb0755a..fd5e7409489d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3393,8 +3393,10 @@ extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); -extern int generic_file_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +int generic_file_mmap(struct file *, struct vm_area_struct *); +int generic_file_mmap_prepare(struct vm_area_desc *desc); +int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, -- cgit v1.2.3 From a85b8544d46390469b6ca72d6bfd3ecb7be985ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 14 Jun 2025 00:30:37 +0200 Subject: wifi: remove zero-length arrays All of these are really meant to be variable-length, and in the case of s1g_beacon it's actually accessed. Make that one in particular, and a couple of others (that aren't used as arrays now), actually variable. Reported-by: syzbot+fd222bb38e916df26fa4@syzkaller.appspotmail.com Fixes: 1e1f706fc2ce ("wifi: cfg80211/mac80211: correctly parse S1G beacon optional elements") Link: https://patch.msgid.link/20250614003037.a3e82e882251.I2e8b58e56ff2a9f8b06c66f036578b7c1d4e4685@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce377f7fb912..22f39e5e2ff1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1278,7 +1278,7 @@ struct ieee80211_ext { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; - u8 variable[0]; + u8 variable[]; } __packed s1g_beacon; } u; } __packed __aligned(2); @@ -1536,7 +1536,7 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; @@ -1721,35 +1721,35 @@ struct ieee80211_tdls_data { struct { u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; - u8 variable[0]; + u8 variable[]; } __packed teardown; struct { u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_resp; } u; } __packed; -- cgit v1.2.3 From d8010d4ba43e9f790925375a7de100604a5e2dba Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 11 Sep 2024 10:53:08 +0200 Subject: x86/bugs: Add a Transient Scheduler Attacks mitigation Add the required features detection glue to bugs.c et all in order to support the TSA mitigation. Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 128ea9f6ccfb6960293ae4212f4f97165e42222d Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Sat, 14 Jun 2025 15:35:29 +0200 Subject: workqueue: Add system_percpu_wq and system_dfl_wq Currently, if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_wq is a per-CPU worqueue, yet nothing in its name tells about that CPU affinity constraint, which is very often not required by users. Make it clear by adding a system_percpu_wq. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b0dc957c3e56..74b0042709cd 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -427,7 +427,7 @@ enum wq_consts { /* * System-wide workqueues which are always present. * - * system_wq is the one used by schedule[_delayed]_work[_on](). + * system_percpu_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. @@ -438,7 +438,7 @@ enum wq_consts { * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * - * system_unbound_wq is unbound workqueue. Workers are not bound to + * system_dfl_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. @@ -455,10 +455,12 @@ enum wq_consts { * system_bh[_highpri]_wq are convenience interface to softirq. BH work items * are executed in the queueing CPU's BH context in the queueing order. */ -extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */ +extern struct workqueue_struct *system_percpu_wq; extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; +extern struct workqueue_struct *system_dfl_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; -- cgit v1.2.3 From 930c2ea566aff59e962c50b2421d5fcc3b98b8be Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Sat, 14 Jun 2025 15:35:30 +0200 Subject: workqueue: Add new WQ_PERCPU flag Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. This patch adds a new WQ_PERCPU flag to explicitly request the use of the per-CPU behavior. Both flags coexist for one release cycle to allow callers to transition their calls. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. tj: Merged doc patch. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 74b0042709cd..f19072605faa 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -401,6 +401,7 @@ enum wq_flags { * http://thread.gmane.org/gmane.linux.kernel/1480396 */ WQ_POWER_EFFICIENT = 1 << 7, + WQ_PERCPU = 1 << 8, /* bound to a specific cpu */ __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ -- cgit v1.2.3 From fc92099902fbf21000554678a47654b029c15a4d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:36:06 -0300 Subject: tools headers: Synchronize linux/bits.h with the kernel sources To pick up the changes in this cset: 1e7933a575ed8af4 ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") 5b572e8a9f3dcd6e ("bits: introduce fixed-type BIT_U*()") 19408200c094858d ("bits: introduce fixed-type GENMASK_U*()") 31299a5e02112411 ("bits: add comments and newlines to #if, #else and #endif directives") This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Acked-by: Vincent Mailhol Cc: I Hsin Cheng Cc: Yury Norov Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Lucas De Marchi Cc: Namhyung Kim Cc: Yury Norov Link: https://lore.kernel.org/r/aEr0ZJ60EbshEy6p@x1 Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index a04afef9efca..682b406e1067 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From 1257b8786ac689a2ce5fe3e1741c65038035adc6 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 17 Jun 2025 12:57:22 -0700 Subject: cgroup: support to enable nmi-safe css_rstat_updated Add necessary infrastructure to enable the nmi-safe execution of css_rstat_updated(). Currently css_rstat_updated() takes a per-cpu per-css raw spinlock to add the given css in the per-cpu per-css update tree. However the kernel can not spin in nmi context, so we need to remove the spinning on the raw spinlock in css_rstat_updated(). To support lockless css_rstat_updated(), let's add necessary data structures in the css and ss structures. Signed-off-by: Shakeel Butt Tested-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index cd7f093e34cd..04191d99228c 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -384,6 +384,9 @@ struct css_rstat_cpu { */ struct cgroup_subsys_state *updated_children; struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ + + struct llist_node lnode; /* lockless list for update */ + struct cgroup_subsys_state *owner; /* back pointer */ }; /* @@ -822,6 +825,7 @@ struct cgroup_subsys { spinlock_t rstat_ss_lock; raw_spinlock_t __percpu *rstat_ss_cpu_lock; + struct llist_head __percpu *lhead; /* lockless update list head */ }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; -- cgit v1.2.3 From 6af89c6ca71742e9227e6f8172a86ce1ee16aa85 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 17 Jun 2025 12:57:24 -0700 Subject: cgroup: remove per-cpu per-subsystem locks The rstat update side used to insert the cgroup whose stats are updated in the update tree and the read side flush the update tree to get the latest uptodate stats. The per-cpu per-subsystem locks were used to synchronize the update and flush side. However now the update side does not access update tree but uses per-cpu lockless lists. So there is no need for locks to synchronize update and flush side. Let's remove them. Suggested-by: JP Kobryn Signed-off-by: Shakeel Butt Tested-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 7 ------- include/trace/events/cgroup.h | 47 ------------------------------------------- 2 files changed, 54 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 04191d99228c..6b93a64115fe 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -375,12 +375,6 @@ struct css_rstat_cpu { * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through * ->updated_next. updated_children is terminated by its container css. - * - * In addition to being more compact, singly-linked list pointing to - * the css makes it unnecessary for each per-cpu struct to point back - * to the associated css. - * - * Protected by per-cpu css->ss->rstat_ss_cpu_lock. */ struct cgroup_subsys_state *updated_children; struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ @@ -824,7 +818,6 @@ struct cgroup_subsys { unsigned int depends_on; spinlock_t rstat_ss_lock; - raw_spinlock_t __percpu *rstat_ss_cpu_lock; struct llist_head __percpu *lhead; /* lockless update list head */ }; diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index 7d332387be6c..ba9229af9a34 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -257,53 +257,6 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock, TP_ARGS(cgrp, cpu, contended) ); -/* - * Related to per CPU locks: - * global rstat_base_cpu_lock for base stats - * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats - */ -DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended, - - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), - - TP_ARGS(cgrp, cpu, contended) -); - -DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath, - - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), - - TP_ARGS(cgrp, cpu, contended) -); - -DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked, - - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), - - TP_ARGS(cgrp, cpu, contended) -); - -DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath, - - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), - - TP_ARGS(cgrp, cpu, contended) -); - -DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock, - - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), - - TP_ARGS(cgrp, cpu, contended) -); - -DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath, - - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), - - TP_ARGS(cgrp, cpu, contended) -); - #endif /* _TRACE_CGROUP_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 07a4688833b237331e5045f90fc546c085b28c86 Mon Sep 17 00:00:00 2001 From: Alexander Wilhelm Date: Thu, 22 May 2025 16:35:30 +0200 Subject: soc: qcom: fix endianness for QMI header The members of QMI header have to be swapped on big endian platforms. Use __le16 types instead of u16 ones. Signed-off-by: Alexander Wilhelm Fixes: 9b8a11e82615 ("soc: qcom: Introduce QMI encoder/decoder") Fixes: 3830d0771ef6 ("soc: qcom: Introduce QMI helpers") Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250522143530.3623809-3-alexander.wilhelm@westermo.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/qmi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index 469e02d2aa0d..291cdc7ef49c 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -24,9 +24,9 @@ struct socket; */ struct qmi_header { u8 type; - u16 txn_id; - u16 msg_id; - u16 msg_len; + __le16 txn_id; + __le16 msg_id; + __le16 msg_len; } __packed; #define QMI_REQUEST 0 -- cgit v1.2.3 From f42b8d78dee77107245ec5beee3eb01915bcae7f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Jun 2025 19:40:04 -0400 Subject: tpm: don't bother with removal of files in directory we'll be removing FWIW, there is a reliable indication of removal - ->i_nlink going to 0 ;-) Signed-off-by: Al Viro --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index a3d8305e88a5..9894c104dc93 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -182,7 +182,7 @@ struct tpm_chip { unsigned long duration[TPM_NUM_DURATIONS]; /* jiffies */ bool duration_adjusted; - struct dentry *bios_dir[TPM_NUM_EVENT_LOG_FILES]; + struct dentry *bios_dir; const struct attribute_group *groups[3 + TPM_MAX_HASHES]; unsigned int groups_cnt; -- cgit v1.2.3 From 2410251cde0bac9f660f276307d6c967466eef0c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 16 Jun 2025 10:46:25 +0100 Subject: net: timestamp: add helper returning skb's tx tstamp Add a helper function skb_get_tx_timestamp() that returns a tx timestamp associated with an error queue skb. Signed-off-by: Pavel Begunkov Acked-by: Willem de Bruijn Link: https://patch.msgid.link/702357dd8936ef4c0d3864441e853bfe3224a677.1750065793.git.asml.silence@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..f5f5a9ad290b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2677,6 +2677,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); +bool skb_has_tx_timestamp(struct sk_buff *skb, const struct sock *sk); +int skb_get_tx_timestamp(struct sk_buff *skb, struct sock *sk, + struct timespec64 *ts); + static inline void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { -- cgit v1.2.3 From ba4618885b23372c45bb1566ed8e3f1c191ff22d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 14 Jun 2025 20:14:34 -0400 Subject: tcp: remove RFC3517/RFC6675 hint state: lost_skb_hint, lost_cnt_hint Now that obsolete RFC3517/RFC6675 TCP loss detection has been removed, we can remove the somewhat complex and intrusive code to maintain its hint state: lost_skb_hint and lost_cnt_hint. This commit makes tcp_clear_retrans_hints_partial() empty. We will remove tcp_clear_retrans_hints_partial() and its call sites in the next commit. Suggested-by: Yuchung Cheng Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250615001435.2390793-3-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 3 --- include/net/tcp.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 29f59d50dc73..1a5737b3753d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -208,7 +208,6 @@ struct tcp_sock { u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u16 gso_segs; /* Max number of segs per GSO packet */ /* from STCP, retrans queue hinting */ - struct sk_buff *lost_skb_hint; struct sk_buff *retransmit_skb_hint; __cacheline_group_end(tcp_sock_read_tx); @@ -419,8 +418,6 @@ struct tcp_sock { struct tcp_sack_block recv_sack_cache[4]; - int lost_cnt_hint; - u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 5078ad868fee..f57d12183794 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1813,7 +1813,6 @@ static inline void tcp_mib_init(struct net *net) /* from STCP */ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { - tp->lost_skb_hint = NULL; } static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) -- cgit v1.2.3 From db16319efcc717a31dcb9c8f038acb6e4111c12e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 14 Jun 2025 20:14:35 -0400 Subject: tcp: remove RFC3517/RFC6675 tcp_clear_retrans_hints_partial() Now that we have removed the RFC3517/RFC6675 hints, tcp_clear_retrans_hints_partial() is empty, and can be removed. Suggested-by: Yuchung Cheng Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250615001435.2390793-4-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f57d12183794..9f852f5f8b95 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1811,13 +1811,8 @@ static inline void tcp_mib_init(struct net *net) } /* from STCP */ -static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) -{ -} - static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) { - tcp_clear_retrans_hints_partial(tp); tp->retransmit_skb_hint = NULL; } -- cgit v1.2.3 From ef07df397a621707903ef0d294a7df11f80cf206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Sat, 14 Jun 2025 09:59:48 +0200 Subject: net: dsa: tag_brcm: add support for legacy FCS tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for legacy Broadcom FCS tags, which are similar to DSA_TAG_PROTO_BRCM_LEGACY. BCM5325 and BCM5365 switches require including the original FCS value and length, as opposed to BCM63xx switches. Adding the original FCS value and length to DSA_TAG_PROTO_BRCM_LEGACY would impact performance of BCM63xx switches, so it's better to create a new tag. Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250614080000.1884236-3-noltari@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 55e2d97f247e..d73ea0880066 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -54,11 +54,13 @@ struct tc_action; #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 #define DSA_TAG_PROTO_LAN937X_VALUE 27 #define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 +#define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE, DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE, + DSA_TAG_PROTO_BRCM_LEGACY_FCS = DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE, DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE, DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE, DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE, -- cgit v1.2.3 From 0f66b616b87cb4a57d22f6f0e0e1698a70d8ad21 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Sun, 15 Jun 2025 20:35:09 +0000 Subject: netmem: fix netmem comments Trivial fix to a couple of outdated netmem comments. No code changes, just more accurately describing current code. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250615203511.591438-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index 386164fb9c18..850869b45b45 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -89,8 +89,7 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov) * typedef netmem_ref - a nonexistent type marking a reference to generic * network memory. * - * A netmem_ref currently is always a reference to a struct page. This - * abstraction is introduced so support for new memory types can be added. + * A netmem_ref can be a struct page* or a struct net_iov* underneath. * * Use the supplied helpers to obtain the underlying memory pointer and fields. */ @@ -117,9 +116,6 @@ static inline struct page *__netmem_to_page(netmem_ref netmem) return (__force struct page *)netmem; } -/* This conversion fails (returns NULL) if the netmem_ref is not struct page - * backed. - */ static inline struct page *netmem_to_page(netmem_ref netmem) { if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) @@ -178,6 +174,21 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) return page_to_pfn(netmem_to_page(netmem)); } +/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to + * common fields. + * @netmem: netmem reference to extract as net_iov. + * + * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic, + * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access + * these fields without a type check to make sure that the underlying mem is + * net_iov or page. + * + * The resulting value of this function can only be used to access the fields + * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in + * undefined behavior. + * + * Return: the netmem_ref cast to net_iov* regardless of its underlying type. + */ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) { return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); -- cgit v1.2.3 From 2de1ba0887e5d3bf02d7c212f380039b34e10aa3 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 16 Jun 2025 16:26:24 +0300 Subject: net: vlan: Make is_vlan_dev() a stub when VLAN is not configured Add a stub implementation of is_vlan_dev() that returns false when VLAN support is not compiled in (CONFIG_VLAN_8021Q=n). This allows us to compile-out VLAN-dependent dead code when it is not needed. This also resolves the following compilation error when: * CONFIG_VLAN_8021Q=n * CONFIG_OBJTOOL=y * CONFIG_OBJTOOL_WERROR=y drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.o: error: objtool: parse_mirred.isra.0+0x370: mlx5e_tc_act_vlan_add_push_action() missing __noreturn in .c/.h or NORETURN() in noreturns.h The error occurs because objtool cannot determine that unreachable BUG() (which doesn't return) calls in VLAN code paths are actually dead code when VLAN support is disabled. Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250616132626.1749331-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/if_vlan.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 38456b42cdb5..618a973ff8ee 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -79,11 +79,6 @@ static inline struct vlan_ethhdr *skb_vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline bool is_vlan_dev(const struct net_device *dev) -{ - return dev->priv_flags & IFF_802_1Q_VLAN; -} - #define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) @@ -200,6 +195,11 @@ struct vlan_dev_priv { #endif }; +static inline bool is_vlan_dev(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN; +} + static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) { return netdev_priv(dev); @@ -237,6 +237,11 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); #else +static inline bool is_vlan_dev(const struct net_device *dev) +{ + return false; +} + static inline struct net_device * __vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id) -- cgit v1.2.3 From 60a8b1a5d0824afda869f18dc0ecfe72f8dfda42 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 16 Jun 2025 16:26:25 +0300 Subject: net: vlan: Replace BUG() with WARN_ON_ONCE() in vlan_dev_* stubs When CONFIG_VLAN_8021Q=n, a set of stub helpers are used, three of these helpers use BUG() unconditionally. This code should not be reached, as callers of these functions should always check for is_vlan_dev() first, but the usage of BUG() is not recommended, replace it with WARN_ON() instead. Reviewed-by: Alex Lazar Reviewed-by: Dragos Tatulea Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250616132626.1749331-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/if_vlan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 618a973ff8ee..b9f699799cf6 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -259,19 +259,19 @@ vlan_for_each(struct net_device *dev, static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { - BUG(); + WARN_ON_ONCE(1); return NULL; } static inline u16 vlan_dev_vlan_id(const struct net_device *dev) { - BUG(); + WARN_ON_ONCE(1); return 0; } static inline __be16 vlan_dev_vlan_proto(const struct net_device *dev) { - BUG(); + WARN_ON_ONCE(1); return 0; } -- cgit v1.2.3 From 9c5f5a5bf0da5cee2044b93907ac6d8d9af0492b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 16 Jun 2025 16:26:26 +0300 Subject: net: vlan: Use IS_ENABLED() helper for CONFIG_VLAN_8021Q guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header currently tests the VLAN core with an explicit pair of 'if defined' checks: #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) Instead, use IS_ENABLED() which is the kernel way to test whether an option is configured as builtin/module. This is purely cosmetic – no functional changes. Reviewed-by: Alex Lazar Reviewed-by: Dragos Tatulea Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250616132626.1749331-4-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b9f699799cf6..15e01935d3fa 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -131,7 +131,7 @@ struct vlan_pcpu_stats { u32 tx_dropped; }; -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); -- cgit v1.2.3 From e3411e326fa48c9be09ba449330352ba698db698 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:09 +0200 Subject: net: ipv4: Add a flags argument to iptunnel_xmit(), udp_tunnel_xmit_skb() iptunnel_xmit() erases the contents of the SKB control block. In order to be able to set particular IPCB flags on the SKB, add a corresponding parameter, and propagate it to udp_tunnel_xmit_skb() as well. In one of the following patches, VXLAN driver will use this facility to mark packets as subject to IP multicast routing. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Acked-by: Antonio Quartulli Link: https://patch.msgid.link/89c9daf9f2dc088b6b92ccebcc929f51742de91f.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 2 +- include/net/udp_tunnel.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 0c3d571a04a1..8cf1380f3656 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -603,7 +603,7 @@ static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, - u8 tos, u8 ttl, __be16 df, bool xnet); + u8 tos, u8 ttl, __be16 df, bool xnet, u16 ipcb_flags); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 2df3b8344eb5..28102c8fd8a8 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -150,7 +150,7 @@ static inline void udp_tunnel_drop_rx_info(struct net_device *dev) void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck); + bool xnet, bool nocheck, u16 ipcb_flags); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3 From 35bec72a24ace52a7f57642ff2813f22733b08fd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:12 +0200 Subject: net: ipv4: Add ip_mr_output() Multicast routing is today handled in the input path. Locally generated MC packets don't hit the IPMR code today. Thus if a VXLAN remote address is multicast, the driver needs to set an OIF during route lookup. Thus MC routing configuration needs to be kept in sync with the VXLAN FDB and MDB. Ideally, the VXLAN packets would be routed by the MC routing code instead. To that end, this patch adds support to route locally generated multicast packets. The newly-added routines do largely what ip_mr_input() and ip_mr_forward() do: make an MR cache lookup to find where to send the packets, and use ip_mc_output() to send each of them. When no cache entry is found, the packet is punted to the daemon for resolution. However, an installation that uses a VXLAN underlay netdevice for which it also has matching MC routes, would get a different routing with this patch. Previously, the MC packets would be delivered directly to the underlay port, whereas now they would be MC-routed. In order to avoid this change in behavior, introduce an IPCB flag. Only if the flag is set will ip_mr_output() actually engage, otherwise it reverts to ip_mc_output(). This code is based on work by Roopa Prabhu and Nikolay Aleksandrov. Signed-off-by: Roopa Prabhu Signed-off-by: Nikolay Aleksandrov Signed-off-by: Benjamin Poirier Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/0aadbd49330471c0f758d54afb05eb3b6e3a6b65.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 47ed6d23853d..375304bb99f6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -59,6 +59,7 @@ struct inet_skb_parm { #define IPSKB_L3SLAVE BIT(7) #define IPSKB_NOPOLICY BIT(8) #define IPSKB_MULTIPATH BIT(9) +#define IPSKB_MCROUTE BIT(10) u16 frag_max_size; }; @@ -167,6 +168,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt, int ip_local_deliver(struct sk_buff *skb); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); +int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3 From 6a7d88ca15f73c5c570c372238f71d63da1fda55 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:13 +0200 Subject: net: ipv6: Make udp_tunnel6_xmit_skb() void The function always returns zero, thus the return value does not carry any signal. Just make it void. Most callers already ignore the return value. However: - Refold arguments of the call from sctp_v6_xmit() so that they fit into the 80-column limit. - tipc_udp_xmit() initializes err from the return value, but that should already be always zero at that point. So there's no practical change, but elision of the assignment prompts a couple more tweaks to clean up the function. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/7facacf9d8ca3ca9391a4aee88160913671b868d.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/udp_tunnel.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 28102c8fd8a8..0b01f6ade20d 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -152,13 +152,13 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck, u16 ipcb_flags); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, - struct net_device *dev, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be32 label, - __be16 src_port, __be16 dst_port, bool nocheck); +void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck); void udp_tunnel_sock_release(struct socket *sock); -- cgit v1.2.3 From f78c75d84fe83898f0a00658f593d4f17b38cbc6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:14 +0200 Subject: net: ipv6: Add a flags argument to ip6tunnel_xmit(), udp_tunnel6_xmit_skb() ip6tunnel_xmit() erases the contents of the SKB control block. In order to be able to set particular IP6CB flags on the SKB, add a corresponding parameter, and propagate it to udp_tunnel6_xmit_skb() as well. In one of the following patches, VXLAN driver will use this facility to mark packets as subject to IPv6 multicast routing. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/acb4f9f3e40c3a931236c3af08a720b017fbfbfb.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip6_tunnel.h | 3 ++- include/net/udp_tunnel.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 399592405c72..dd163495f353 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -152,11 +152,12 @@ int ip6_tnl_get_iflink(const struct net_device *dev); int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, - struct net_device *dev) + struct net_device *dev, u16 ip6cb_flags) { int pkt_len, err; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + IP6CB(skb)->flags = ip6cb_flags; pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0b01f6ade20d..e3c70b579095 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -158,7 +158,8 @@ void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, - __be16 src_port, __be16 dst_port, bool nocheck); + __be16 src_port, __be16 dst_port, bool nocheck, + u16 ip6cb_flags); void udp_tunnel_sock_release(struct socket *sock); -- cgit v1.2.3 From 96e8f5a9fe2d91b9f9eb8b45cc13ce1ca6a8af82 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:18 +0200 Subject: net: ipv6: Add ip6_mr_output() Multicast routing is today handled in the input path. Locally generated MC packets don't hit the IPMR code today. Thus if a VXLAN remote address is multicast, the driver needs to set an OIF during route lookup. Thus MC routing configuration needs to be kept in sync with the VXLAN FDB and MDB. Ideally, the VXLAN packets would be routed by the MC routing code instead. To that end, this patch adds support to route locally generated multicast packets. The newly-added routines do largely what ip6_mr_input() and ip6_mr_forward() do: make an MR cache lookup to find where to send the packets, and use ip6_output() to send each of them. When no cache entry is found, the packet is punted to the daemon for resolution. Similarly to the IPv4 case in a previous patch, the new logic is contingent on a newly-added IP6CB flag being set. Signed-off-by: Petr Machata Link: https://patch.msgid.link/3bcc034a3ab4d3c291072fff38f78d7fbbeef4e6.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 1 + include/linux/mroute6.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5aeeed22f35b..db0eb0d86b64 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -156,6 +156,7 @@ struct inet6_skb_parm { #define IP6SKB_SEG6 256 #define IP6SKB_FAKEJUMBO 512 #define IP6SKB_MULTIPATH 1024 +#define IP6SKB_MCROUTE 2048 }; #if defined(CONFIG_NET_L3_MASTER_DEV) diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 63ef5191cc57..fddafdc168f7 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -31,6 +31,7 @@ extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t); extern int ip6_mr_input(struct sk_buff *skb); extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); extern int ip6_mr_init(void); +extern int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb); extern void ip6_mr_cleanup(void); int ip6mr_ioctl(struct sock *sk, int cmd, void *arg); #else @@ -58,6 +59,12 @@ static inline int ip6_mr_init(void) return 0; } +static inline int +ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return ip6_output(net, sk, skb); +} + static inline void ip6_mr_cleanup(void) { return; -- cgit v1.2.3 From f8337efa4ff5a27e6c1d4e384166413eecd21a65 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 17 Jun 2025 00:44:19 +0200 Subject: vxlan: Support MC routing in the underlay Locally-generated MC packets have so far not been subject to MC routing. Instead an MC-enabled installation would maintain the MC routing tables, and separately from that the list of interfaces to send packets to as part of the VXLAN FDB and MDB. In a previous patch, a ip_mr_output() and ip6_mr_output() routines were added for IPv4 and IPv6. All locally generated MC traffic is now passed through these functions. For reasons of backward compatibility, an SKB (IPCB / IP6CB) flag guards the actual MC routing. This patch adds logic to set the flag, and the UAPI to enable the behavior. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/d899655bb7e9b2521ee8c793e67056b9fd02ba12.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/vxlan.h | 5 ++++- include/uapi/linux/if_link.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index e2f7ca045d3e..0ee50785f4f1 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -332,6 +332,7 @@ struct vxlan_dev { #define VXLAN_F_VNIFILTER 0x20000 #define VXLAN_F_MDB 0x40000 #define VXLAN_F_LOCALBYPASS 0x80000 +#define VXLAN_F_MC_ROUTE 0x100000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -353,7 +354,9 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_COLLECT_METADATA | \ VXLAN_F_VNIFILTER | \ - VXLAN_F_LOCALBYPASS) + VXLAN_F_LOCALBYPASS | \ + VXLAN_F_MC_ROUTE | \ + 0) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3ad2d5d98034..873c285996fe 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1398,6 +1398,7 @@ enum { IFLA_VXLAN_LOCALBYPASS, IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ IFLA_VXLAN_RESERVED_BITS, + IFLA_VXLAN_MC_ROUTE, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From fd0406e5ca53b804353d4b1b60a980c13cbfbea3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jun 2025 08:10:47 -1000 Subject: net: tcp: tsq: Convert from tasklet to BH workqueue The only generic interface to execute asynchronously in the BH context is tasklet; however, it's marked deprecated and has some design flaws. To replace tasklets, BH workqueue support was recently added. A BH workqueue behaves similarly to regular workqueues except that the queued work items are executed in the BH context. This patch converts TCP Small Queues implementation from tasklet to BH workqueue. Semantically, this is an equivalent conversion and there shouldn't be any user-visible behavior changes. While workqueue's queueing and execution paths are a bit heavier than tasklet's, unless the work item is being queued every packet, the difference hopefully shouldn't matter. My experience with the networking stack is very limited and this patch definitely needs attention from someone who actually understands networking. Signed-off-by: Tejun Heo Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Cc: David Ahern Link: https://patch.msgid.link/aFBeJ38AS1ZF3Dq5@slm.duckdns.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f852f5f8b95..761c4a0ad386 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -321,7 +321,7 @@ extern struct proto tcp_prot; #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) -void tcp_tasklet_init(void); +void tcp_tsq_work_init(void); int tcp_v4_err(struct sk_buff *skb, u32); -- cgit v1.2.3 From c9e1225352d48b184991a4edc77b897cac66991e Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 16 Jun 2025 17:14:30 +0300 Subject: net: Allow const args for of page_to_netmem() This allows calling page_to_netmem() with a const page * argument. Signed-off-by: Dragos Tatulea Reviewed-by: Mina Almasry Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250616141441.1243044-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index 850869b45b45..7a1dafa3f080 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -139,7 +139,7 @@ static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) return (__force netmem_ref)((unsigned long)niov | NET_IOV); } -static inline netmem_ref page_to_netmem(struct page *page) +static inline netmem_ref page_to_netmem(const struct page *page) { return (__force netmem_ref)page; } -- cgit v1.2.3 From 1cbb49f85b4095af798f1a421db2e08894d0606c Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 16 Jun 2025 17:14:31 +0300 Subject: net: Add skb_can_coalesce for netmem Allow drivers that have moved over to netmem to do fragment coalescing. Signed-off-by: Dragos Tatulea Signed-off-by: Cosmin Ratiu Reviewed-by: Tariq Toukan Reviewed-by: Mina Almasry Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250616141441.1243044-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5520524c93bf..9508968cb300 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3873,20 +3873,26 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; -static inline bool skb_can_coalesce(struct sk_buff *skb, int i, - const struct page *page, int off) +static inline bool skb_can_coalesce_netmem(struct sk_buff *skb, int i, + netmem_ref netmem, int off) { if (skb_zcopy(skb)) return false; if (i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - return page == skb_frag_page(frag) && + return netmem == skb_frag_netmem(frag) && off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } +static inline bool skb_can_coalesce(struct sk_buff *skb, int i, + const struct page *page, int off) +{ + return skb_can_coalesce_netmem(skb, i, page_to_netmem(page), off); +} + static inline int __skb_linearize(struct sk_buff *skb) { return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; -- cgit v1.2.3 From a202f24b08587021a39eade5aa5444d5714689fb Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 16 Jun 2025 17:14:32 +0300 Subject: page_pool: Add page_pool_dev_alloc_netmems helper This is the netmem counterpart of page_pool_dev_alloc_pages() which uses the default GFP flags for RX. Signed-off-by: Dragos Tatulea Reviewed-by: Mina Almasry Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250616141441.1243044-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 93f2c31baf9b..773fc65780b5 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -153,6 +153,13 @@ static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool, return page_pool_alloc_netmem(pool, offset, size, gfp); } +static inline netmem_ref page_pool_dev_alloc_netmems(struct page_pool *pool) +{ + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + + return page_pool_alloc_netmems(pool, gfp); +} + static inline struct page *page_pool_alloc(struct page_pool *pool, unsigned int *offset, unsigned int *size, gfp_t gfp) -- cgit v1.2.3 From 7851263998d4269125fd6cb3fdbfc7c6db853859 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 16 Jun 2025 11:21:15 -0700 Subject: atm: Revert atm_account_tx() if copy_from_iter_full() fails. In vcc_sendmsg(), we account skb->truesize to sk->sk_wmem_alloc by atm_account_tx(). It is expected to be reverted by atm_pop_raw() later called by vcc->dev->ops->send(vcc, skb). However, vcc_sendmsg() misses the same revert when copy_from_iter_full() fails, and then we will leak a socket. Let's factorise the revert part as atm_return_tx() and call it in the failure path. Note that the corresponding sk_wmem_alloc operation can be found in alloc_tx() as of the blamed commit. $ git blame -L:alloc_tx net/atm/common.c c55fa3cccbc2c~ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Simon Horman Closes: https://lore.kernel.org/netdev/20250614161959.GR414686@horms.kernel.org/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250616182147.963333-3-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); -- cgit v1.2.3 From 30b58444807c93bffeaba7d776110f2a909d2f9a Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 17 Jun 2025 13:40:56 +0800 Subject: erofs: remove unused trace event erofs_destroy_inode The trace event `erofs_destroy_inode` was added but remains unused. This unused event contributes approximately 5KB to the kernel module size. Reported-by: Steven Rostedt Closes: https://lore.kernel.org/r/20250612224906.15000244@batman.local.home Fixes: 13f06f48f7bf ("staging: erofs: support tracepoint") Cc: stable@vger.kernel.org Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250617054056.3232365-1-hsiangkao@linux.alibaba.com --- include/trace/events/erofs.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index a5f4b9234f46..dad7360f42f9 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -211,24 +211,6 @@ TRACE_EVENT(erofs_map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -TRACE_EVENT(erofs_destroy_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_I(inode)->nid; - ), - - TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) -); - #endif /* _TRACE_EROFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6c1dedf805ecd304236a83a2057ed803fb6b32f6 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 16 Jun 2025 14:14:24 +0200 Subject: media: rcar-fcp: Add rcar_fcp_soft_reset() Add a function to perform soft reset of the FCP. It is intended to support the correct stop procedure of the VSPX-FCPVX and VSPD-FCPD pairs according to section "62.3.7.3 Reset Operation" of the R-Car Hardware Manual at revision 1.20. Signed-off-by: Jacopo Mondi Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20250616-vspx-reset-v2-1-6cc12ed7e9bb@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/rcar-fcp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/media/rcar-fcp.h b/include/media/rcar-fcp.h index 179240fb163b..6ac9be9f675e 100644 --- a/include/media/rcar-fcp.h +++ b/include/media/rcar-fcp.h @@ -18,6 +18,7 @@ void rcar_fcp_put(struct rcar_fcp_device *fcp); struct device *rcar_fcp_get_device(struct rcar_fcp_device *fcp); int rcar_fcp_enable(struct rcar_fcp_device *fcp); void rcar_fcp_disable(struct rcar_fcp_device *fcp); +int rcar_fcp_soft_reset(struct rcar_fcp_device *fcp); #else static inline struct rcar_fcp_device *rcar_fcp_get(const struct device_node *np) { @@ -33,6 +34,10 @@ static inline int rcar_fcp_enable(struct rcar_fcp_device *fcp) return 0; } static inline void rcar_fcp_disable(struct rcar_fcp_device *fcp) { } +static inline int rcar_fcp_soft_reset(struct rcar_fcp_device *fcp) +{ + return 0; +} #endif #endif /* __MEDIA_RCAR_FCP_H__ */ -- cgit v1.2.3 From d06c1a9f348d22478c6bc5684f9c990e15ada1e9 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Tue, 17 Jun 2025 09:23:28 +0200 Subject: media: vsp1: Add VSPX support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for VSPX, a specialized version of the VSP2 that transfers data to the ISP. The VSPX is composed of two RPF units to read data from external memory and an IIF instance that performs transfer towards the ISP. The VSPX is supported through a newly introduced vsp1_vspx.c file that exposes two interfaces: vsp1_vspx interface, declared in vsp1_vspx.h for the vsp1 core to initialize and cleanup the VSPX, and a vsp1_isp interface, declared in include/media/vsp1.h for the ISP driver to control the VSPX operations. Tested-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Jacopo Mondi Link: https://lore.kernel.org/r/20250617-b4-vspx-v13-1-9f4054c1c9af@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/vsp1.h | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'include') diff --git a/include/media/vsp1.h b/include/media/vsp1.h index 4ea6352fd63f..d9b91ff02761 100644 --- a/include/media/vsp1.h +++ b/include/media/vsp1.h @@ -14,6 +14,11 @@ #include struct device; +struct vsp1_dl_list; + +/* ----------------------------------------------------------------------------- + * VSP1 DU interface + */ int vsp1_du_init(struct device *dev); @@ -121,4 +126,88 @@ void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index, int vsp1_du_map_sg(struct device *dev, struct sg_table *sgt); void vsp1_du_unmap_sg(struct device *dev, struct sg_table *sgt); +/* ----------------------------------------------------------------------------- + * VSP1 ISP interface + */ + +/** + * struct vsp1_isp_buffer_desc - Describe a buffer allocated by VSPX + * @size: Byte size of the buffer allocated by VSPX + * @cpu_addr: CPU-mapped address of a buffer allocated by VSPX + * @dma_addr: bus address of a buffer allocated by VSPX + */ +struct vsp1_isp_buffer_desc { + size_t size; + void *cpu_addr; + dma_addr_t dma_addr; +}; + +/** + * struct vsp1_isp_job_desc - Describe a VSPX buffer transfer request + * @config: ConfigDMA buffer descriptor + * @config.pairs: number of reg-value pairs in the ConfigDMA buffer + * @config.mem: bus address of the ConfigDMA buffer + * @img: RAW image buffer descriptor + * @img.fmt: RAW image format + * @img.mem: bus address of the RAW image buffer + * @dl: pointer to the display list populated by the VSPX driver in the + * vsp1_isp_job_prepare() function + * + * Describe a transfer request for the VSPX to perform on behalf of the ISP. + * The job descriptor contains an optional ConfigDMA buffer and one RAW image + * buffer. Set config.pairs to 0 if no ConfigDMA buffer should be transferred. + * The minimum number of config.pairs that can be written using ConfigDMA is 17. + * A number of pairs < 16 corrupts the output image. A number of pairs == 16 + * freezes the VSPX operation. If the ISP driver has to write less than 17 pairs + * it shall pad the buffer with writes directed to registers that have no effect + * or avoid using ConfigDMA at all for such small write sequences. + * + * The ISP driver shall pass an instance this type to the vsp1_isp_job_prepare() + * function that will populate the display list pointer @dl using the @config + * and @img descriptors. When the job has to be run on the VSPX, the descriptor + * shall be passed to vsp1_isp_job_run() which consumes the display list. + * + * Job descriptors not yet run shall be released with a call to + * vsp1_isp_job_release() when stopping the streaming in order to properly + * release the resources acquired by vsp1_isp_job_prepare(). + */ +struct vsp1_isp_job_desc { + struct { + unsigned int pairs; + dma_addr_t mem; + } config; + struct { + struct v4l2_pix_format_mplane fmt; + dma_addr_t mem; + } img; + struct vsp1_dl_list *dl; +}; + +/** + * struct vsp1_vspx_frame_end - VSPX frame end callback data + * @vspx_frame_end: Frame end callback. Called after a transfer job has been + * completed. If the job includes both a ConfigDMA and a + * RAW image, the callback is called after both have been + * transferred + * @frame_end_data: Frame end callback data, passed to vspx_frame_end + */ +struct vsp1_vspx_frame_end { + void (*vspx_frame_end)(void *data); + void *frame_end_data; +}; + +int vsp1_isp_init(struct device *dev); +struct device *vsp1_isp_get_bus_master(struct device *dev); +int vsp1_isp_alloc_buffer(struct device *dev, size_t size, + struct vsp1_isp_buffer_desc *buffer_desc); +void vsp1_isp_free_buffer(struct device *dev, + struct vsp1_isp_buffer_desc *buffer_desc); +int vsp1_isp_start_streaming(struct device *dev, + struct vsp1_vspx_frame_end *frame_end); +void vsp1_isp_stop_streaming(struct device *dev); +int vsp1_isp_job_prepare(struct device *dev, + struct vsp1_isp_job_desc *job); +int vsp1_isp_job_run(struct device *dev, struct vsp1_isp_job_desc *job); +void vsp1_isp_job_release(struct device *dev, struct vsp1_isp_job_desc *job); + #endif /* __MEDIA_VSP1_H__ */ -- cgit v1.2.3 From 7c8c957ef12c41968adb66d785ce1dd5fb2f96e7 Mon Sep 17 00:00:00 2001 From: Stefan Klug Date: Fri, 23 May 2025 17:14:31 +0200 Subject: media: rkisp1: Add RKISP1_CID_SUPPORTED_PARAMS_BLOCKS control Add a RKISP1_CID_SUPPORTED_PARAMS_BLOCKS V4L2 control to be able to query the parameters blocks supported by the current kernel on the current hardware from user space. Signed-off-by: Stefan Klug Reviewed-by: Paul Elder Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20250523-supported-params-and-wdr-v3-2-7283b8536694@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/uapi/linux/rkisp1-config.h | 11 +++++++++++ include/uapi/linux/v4l2-controls.h | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 2d995f3c1ca3..5ca4d5961c5b 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -1086,6 +1086,9 @@ enum rkisp1_ext_params_block_type { #define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE (1U << 0) #define RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE (1U << 1) +/* A bitmask of parameters blocks supported on the current hardware. */ +#define RKISP1_CID_SUPPORTED_PARAMS_BLOCKS (V4L2_CID_USER_RKISP1_BASE + 0x01) + /** * struct rkisp1_ext_params_block_header - RkISP1 extensible parameters block * header @@ -1520,6 +1523,14 @@ enum rksip1_ext_param_buffer_version { * V4L2 control. If such control is not available, userspace should assume only * RKISP1_EXT_PARAM_BUFFER_V1 is supported by the driver. * + * The read-only V4L2 control ``RKISP1_CID_SUPPORTED_PARAMS_BLOCKS`` can be used + * to query the blocks supported by the device. It contains a bitmask where each + * bit represents the availability of the corresponding entry from the + * :c:type:`rkisp1_ext_params_block_type` enum. The current and default values + * of the control represents the blocks supported by the device instance, while + * the maximum value represents the blocks supported by the kernel driver, + * independently of the device instance. + * * For each ISP block that userspace wants to configure, a block-specific * structure is appended to the @data buffer, one after the other without gaps * in between nor overlaps. Userspace shall populate the @data_size field with diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 72e32814ea83..f836512e9deb 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -222,6 +222,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_UVC_BASE (V4L2_CID_USER_BASE + 0x11e0) +/* + * The base for Rockchip ISP1 driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_RKISP1_BASE (V4L2_CID_USER_BASE + 0x1220) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From cd403e8aed6caad87967d2c135b57d92ba8e5544 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Tue, 10 Jun 2025 17:55:27 +0530 Subject: media: rockchip: rkisp1: Add support for Wide Dynamic Range RKISP supports a basic Wide Dynamic Range (WDR) module since the first iteration (v1.0) of the ISP. Add support for enabling and configuring it using extensible parameters. Also, to ease programming, switch to using macro variables for defining the tonemapping curve register addresses. Reviewed-by: Stefan Klug Tested-by: Stefan Klug Reviewed-by: Paul Elder Reviewed-by: Laurent Pinchart Signed-off-by: Jai Luthra Link: https://lore.kernel.org/r/20250610-wdr-latest-v4-1-b69d0ac17ce9@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/uapi/linux/rkisp1-config.h | 95 +++++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 5ca4d5961c5b..3b060ea6eed7 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -169,6 +169,13 @@ */ #define RKISP1_CIF_ISP_COMPAND_NUM_POINTS 64 +/* + * Wide Dynamic Range + */ +#define RKISP1_CIF_ISP_WDR_CURVE_NUM_INTERV 32 +#define RKISP1_CIF_ISP_WDR_CURVE_NUM_COEFF (RKISP1_CIF_ISP_WDR_CURVE_NUM_INTERV + 1) +#define RKISP1_CIF_ISP_WDR_CURVE_NUM_DY_REGS 4 + /* * Measurement types */ @@ -889,6 +896,72 @@ struct rkisp1_cif_isp_compand_curve_config { __u32 y[RKISP1_CIF_ISP_COMPAND_NUM_POINTS]; }; +/** + * struct rkisp1_cif_isp_wdr_tone_curve - Tone mapping curve definition for WDR. + * + * @dY: the dYn increments for horizontal (input) axis of the tone curve. + * each 3-bit dY value represents an increment of 2**(value+3). + * dY[0] bits 0:2 is increment dY1, bit 3 unused + * dY[0] bits 4:6 is increment dY2, bit 7 unused + * ... + * dY[0] bits 28:30 is increment dY8, bit 31 unused + * ... and so on till dY[3] bits 28:30 is increment dY32, bit 31 unused. + * @ym: the Ym values for the vertical (output) axis of the tone curve. + * each value is 13 bit. + */ +struct rkisp1_cif_isp_wdr_tone_curve { + __u32 dY[RKISP1_CIF_ISP_WDR_CURVE_NUM_DY_REGS]; + __u16 ym[RKISP1_CIF_ISP_WDR_CURVE_NUM_COEFF]; +}; + +/** + * struct rkisp1_cif_isp_wdr_iref_config - Illumination reference config for WDR. + * + * Use illumination reference value as described below, instead of only the + * luminance (Y) value for tone mapping and gain calculations: + * IRef = (rgb_factor * RGBMax_tr + (8 - rgb_factor) * Y)/8 + * + * @rgb_factor: defines how much influence the RGBmax approach has in + * comparison to Y (valid values are 0..8). + * @use_y9_8: use Y*9/8 for maximum value calculation along with the + * default of R, G, B for noise reduction. + * @use_rgb7_8: decrease RGBMax by 7/8 for noise reduction. + * @disable_transient: disable transient calculation between Y and RGBY_max. + */ +struct rkisp1_cif_isp_wdr_iref_config { + __u8 rgb_factor; + __u8 use_y9_8; + __u8 use_rgb7_8; + __u8 disable_transient; +}; + +/** + * struct rkisp1_cif_isp_wdr_config - Configuration for wide dynamic range. + * + * @tone_curve: tone mapping curve. + * @iref_config: illumination reference configuration. (when use_iref is true) + * @rgb_offset: RGB offset value for RGB operation mode. (12 bits) + * @luma_offset: luminance offset value for RGB operation mode. (12 bits) + * @dmin_thresh: lower threshold for deltaMin value. (12 bits) + * @dmin_strength: strength factor for deltaMin. (valid range is 0x00..0x10) + * @use_rgb_colorspace: use RGB instead of luminance/chrominance colorspace. + * @bypass_chroma_mapping: disable chrominance mapping (only valid if + * use_rgb_colorspace = 0) + * @use_iref: use illumination reference instead of Y for tone mapping + * and gain calculations. + */ +struct rkisp1_cif_isp_wdr_config { + struct rkisp1_cif_isp_wdr_tone_curve tone_curve; + struct rkisp1_cif_isp_wdr_iref_config iref_config; + __u16 rgb_offset; + __u16 luma_offset; + __u16 dmin_thresh; + __u8 dmin_strength; + __u8 use_rgb_colorspace; + __u8 bypass_chroma_mapping; + __u8 use_iref; +}; + /*---------- PART2: Measurement Statistics ------------*/ /** @@ -1059,6 +1132,7 @@ struct rkisp1_stat_buffer { * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS: BLS in the compand block * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND: Companding expand curve * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS: Companding compress curve + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_WDR: Wide dynamic range */ enum rkisp1_ext_params_block_type { RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS, @@ -1081,6 +1155,7 @@ enum rkisp1_ext_params_block_type { RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS, RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND, RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_WDR, }; #define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE (1U << 0) @@ -1463,6 +1538,23 @@ struct rkisp1_ext_params_compand_curve_config { struct rkisp1_cif_isp_compand_curve_config config; } __attribute__((aligned(8))); +/** + * struct rkisp1_ext_params_wdr_config - RkISP1 extensible params + * Wide dynamic range config + * + * RkISP1 extensible parameters WDR block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_WDR` + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: WDR configuration, see + * :c:type:`rkisp1_cif_isp_wdr_config` + */ +struct rkisp1_ext_params_wdr_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_wdr_config config; +} __attribute__((aligned(8))); + /* * The rkisp1_ext_params_compand_curve_config structure is counted twice as it * is used for both the COMPAND_EXPAND and COMPAND_COMPRESS block types. @@ -1487,7 +1579,8 @@ struct rkisp1_ext_params_compand_curve_config { sizeof(struct rkisp1_ext_params_afc_config) +\ sizeof(struct rkisp1_ext_params_compand_bls_config) +\ sizeof(struct rkisp1_ext_params_compand_curve_config) +\ - sizeof(struct rkisp1_ext_params_compand_curve_config)) + sizeof(struct rkisp1_ext_params_compand_curve_config) +\ + sizeof(struct rkisp1_ext_params_wdr_config)) /** * enum rksip1_ext_param_buffer_version - RkISP1 extensible parameters version -- cgit v1.2.3 From 635e118317ffa773f6d25ec6a71b7927d7e8886a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:54:44 +0200 Subject: Revert "mtd: core: always create master device" The idea behind this patch was to always let a "master" mtd device available to anchor runtime PM. Historically, there was no mtd device representing the whole storage as soon as partitions were coming into play. The introduction of CONFIG_MTD_PARTITIONED_MASTER allowed to keep this "master" device, but was not enabled by default to avoid breaking existing users (otherwise the mtd device numbering would be totally messed up with an off by 1, at least). The approach of adding an mtd_master class on top of partitioned mtd devices is breaking the mtd core in many creative ways, so better think again this approach and revert the faulty changes for now. This reverts commit 0aa7b390fc40a871267a2328bbbefca8b37ad307. Fixes: 0aa7b390fc40 ("mtd: core: always create master device") Tested-by: Guenter Roeck Acked-by: Guenter Roeck Signed-off-by: Miquel Raynal --- include/linux/mtd/partitions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 5daf80df9e89..b74a539ec581 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); deregister_mtd_parser) int mtd_add_partition(struct mtd_info *master, const char *name, - long long offset, long long length, struct mtd_info **part); + long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); uint64_t mtd_get_device_size(const struct mtd_info *mtd); -- cgit v1.2.3 From dba90f5a79c13936de4273a19e67908a0c296afe Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:48:00 +0200 Subject: mtd: spinand: winbond: Prevent unsupported frequencies on dual/quad I/O variants Dual and quad capable chips natively support dual and quad I/O variants at up to 104MHz (1-2-2 and 1-4-4 operations). Reaching the maximum speed of 166MHz is theoretically possible (while still unsupported in the field) by adding a few more dummy cycles. Let's be accurate and clearly state this limit. Setting a maximum frequency implies adding the frequency parameter to the macro, which is done using a variadic argument to avoid impacting all the other drivers which already make use of this macro. Fixes: 1ea808b4d15b ("mtd: spinand: winbond: Update the *JW chip definitions") Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 811a0f356315..15eaa09da998 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -113,11 +113,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ @@ -151,11 +152,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ -- cgit v1.2.3 From ffaf1bf3737f706e4e9be876de4bc3c8fc578091 Mon Sep 17 00:00:00 2001 From: RubenKelevra Date: Wed, 18 Jun 2025 01:09:27 +0200 Subject: fs_context: fix parameter name in infofc() macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro takes a parameter called "p" but references "fc" internally. This happens to compile as long as callers pass a variable named fc, but breaks otherwise. Rename the first parameter to “fc” to match the usage and to be consistent with warnfc() / errorfc(). Fixes: a3ff937b33d9 ("prefix-handling analogues of errorf() and friends") Signed-off-by: RubenKelevra Link: https://lore.kernel.org/20250617230927.1790401-1-rubenkelevra@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index a19e4bd32e4d..7773eb870039 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -200,7 +200,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, */ #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__) #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__) -#define infofc(p, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) +#define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) /** * warnf - Store supplementary warning message -- cgit v1.2.3 From 4cdf874f67adfdec4f0a288c76f9aba05f9babe2 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Tue, 10 Jun 2025 17:23:06 +0200 Subject: dt-bindings: pinctrl: stm32: Add RSVD mux function Document the RSVD (Reserved) mux function, used to reserve pins for a coprocessor not running Linux. Signed-off-by: Fabien Dessenne Signed-off-by: Antonio Borneo Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250610152309.299438-3-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- include/dt-bindings/pinctrl/stm32-pinfunc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/stm32-pinfunc.h b/include/dt-bindings/pinctrl/stm32-pinfunc.h index 28ad0235086a..af3fd388329a 100644 --- a/include/dt-bindings/pinctrl/stm32-pinfunc.h +++ b/include/dt-bindings/pinctrl/stm32-pinfunc.h @@ -26,6 +26,7 @@ #define AF14 0xf #define AF15 0x10 #define ANALOG 0x11 +#define RSVD 0x12 /* define Pins number*/ #define PIN_NO(port, line) (((port) - 'A') * 0x10 + (line)) -- cgit v1.2.3 From 82a0302e7167d0b7c6cde56613db3748f8dd806d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Jun 2025 16:38:49 +0800 Subject: padata: Remove comment for reorder_work Remove comment for reorder_work which no longer exists. Reported-by: Stephen Rothwell Fixes: 71203f68c774 ("padata: Fix pd UAF once and for all") Signed-off-by: Herbert Xu --- include/linux/padata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/padata.h b/include/linux/padata.h index b486c7359de2..765f2778e264 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -90,7 +90,6 @@ struct padata_cpumask { * @processed: Number of already processed objects. * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. - * @reorder_work: work struct for reordering. */ struct parallel_data { struct padata_shell *ps; -- cgit v1.2.3 From 9724e6f1953644cc9a5d102605d624bc79609038 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 11 Jun 2025 08:13:36 +0200 Subject: pinctrl: Constify pointers to 'pinctrl_desc' Pin controller core code only stores the pointer to 'struct pinctrl_desc' and does not modify it anywhere. The pointer can be changed to pointer to const which makes the code safer, explicit and later allows constifying 'pinctrl_desc' allocations in individual drivers. Reviewed-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250611-pinctrl-const-desc-v2-4-b11c1d650384@linaro.org Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 9a8189ffd0f2..d138e1815645 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -165,25 +165,25 @@ struct pinctrl_desc { /* External interface to pin controller */ -extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, +extern int pinctrl_register_and_init(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev); extern int pinctrl_enable(struct pinctrl_dev *pctldev); /* Please use pinctrl_register_and_init() and pinctrl_enable() instead */ -extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, +extern struct pinctrl_dev *pinctrl_register(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); extern void pinctrl_unregister(struct pinctrl_dev *pctldev); extern int devm_pinctrl_register_and_init(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data, struct pinctrl_dev **pctldev); /* Please use devm_pinctrl_register_and_init() instead */ extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data); extern void devm_pinctrl_unregister(struct device *dev, -- cgit v1.2.3 From 8358102806c619d8d6c814010173617fb374b77e Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 18 May 2025 22:06:48 +0000 Subject: dt-bindings: power: rockchip: Add support for RK3528 Add the compatible string and power domains for RK3528 SoC. Signed-off-by: Jonas Karlman Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250518220707.669515-2-jonas@kwiboo.se Signed-off-by: Ulf Hansson --- include/dt-bindings/power/rockchip,rk3528-power.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/dt-bindings/power/rockchip,rk3528-power.h (limited to 'include') diff --git a/include/dt-bindings/power/rockchip,rk3528-power.h b/include/dt-bindings/power/rockchip,rk3528-power.h new file mode 100644 index 000000000000..318923cdaaf6 --- /dev/null +++ b/include/dt-bindings/power/rockchip,rk3528-power.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ +#ifndef __DT_BINDINGS_POWER_RK3528_POWER_H__ +#define __DT_BINDINGS_POWER_RK3528_POWER_H__ + +#define RK3528_PD_PMU 0 +#define RK3528_PD_BUS 1 +#define RK3528_PD_DDR 2 +#define RK3528_PD_MSCH 3 + +/* VD_GPU */ +#define RK3528_PD_GPU 4 + +/* VD_LOGIC */ +#define RK3528_PD_RKVDEC 5 +#define RK3528_PD_RKVENC 6 +#define RK3528_PD_VO 7 +#define RK3528_PD_VPU 8 + +#endif -- cgit v1.2.3 From d80a75624051b817043431f847470fb4680f2582 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Mon, 26 May 2025 19:30:55 +0800 Subject: cpufreq: CPPC: Remove cpu_data_list After commit a28b2bfc099c ("cppc_cpufreq: replace per-cpu data array with a list"), cpu_data can be got from policy->driver_data, so cpu_data_list is not actually needed and can be removed. Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250526113057.3086513-2-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 325e9543e08f..20f3d62e7a16 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -139,7 +139,6 @@ struct cppc_perf_fb_ctrs { /* Per CPU container for runtime CPPC management. */ struct cppc_cpudata { - struct list_head node; struct cppc_perf_caps perf_caps; struct cppc_perf_ctrls perf_ctrls; struct cppc_perf_fb_ctrs perf_fb_ctrs; -- cgit v1.2.3 From 271ff96d6066347cd267ac3bcd6021bd4d38913d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:07 +0300 Subject: PM: runtime: Document return values of suspend-related API functions Document return values for device suspend and idle related API functions. Signed-off-by: Sakari Ailus Link: https://patch.msgid.link/20250616061212.2286741-2-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 147 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index e7cb70fcc0af..9dd2e4031a27 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -337,6 +337,20 @@ static inline void pm_runtime_release_supplier(struct device_link *link) {} * Invoke the "idle check" callback of @dev and, depending on its return value, * set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend has been enabled for it). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing + * or device not in %RPM_ACTIVE state. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM idle and suspend callbacks. */ static inline int pm_runtime_idle(struct device *dev) { @@ -346,6 +360,18 @@ static inline int pm_runtime_idle(struct device *dev) /** * pm_runtime_suspend - Suspend a device synchronously. * @dev: Target device. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_suspend(struct device *dev) { @@ -358,6 +384,18 @@ static inline int pm_runtime_suspend(struct device *dev) * * Set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend is enabled for it) without engaging its "idle check" callback. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_autosuspend(struct device *dev) { @@ -379,6 +417,18 @@ static inline int pm_runtime_resume(struct device *dev) * * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev * asynchronously. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing + * or device not in %RPM_ACTIVE state. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_request_idle(struct device *dev) { @@ -400,6 +450,17 @@ static inline int pm_request_resume(struct device *dev) * * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev * asynchronously. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_request_autosuspend(struct device *dev) { @@ -464,6 +525,17 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_runtime_put(struct device *dev) { @@ -478,6 +550,17 @@ DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int __pm_runtime_put_autosuspend(struct device *dev) { @@ -490,6 +573,17 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. */ static inline int pm_runtime_put_autosuspend(struct device *dev) { @@ -506,9 +600,20 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) * return value, set up autosuspend of @dev or suspend it (depending on whether * or not autosuspend has been enabled for it). * - * The possible return values of this function are the same as for - * pm_runtime_idle() and the runtime PM usage counter of @dev remains - * decremented in all cases, even if it returns an error code. + * The runtime PM usage counter of @dev remains decremented in all cases, even + * if it returns an error code. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync(struct device *dev) { @@ -522,9 +627,21 @@ static inline int pm_runtime_put_sync(struct device *dev) * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, carry out runtime-suspend of @dev synchronously. * - * The possible return values of this function are the same as for - * pm_runtime_suspend() and the runtime PM usage counter of @dev remains - * decremented in all cases, even if it returns an error code. + * The runtime PM usage counter of @dev remains decremented in all cases, even + * if it returns an error code. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync_suspend(struct device *dev) { @@ -539,9 +656,21 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending * on whether or not autosuspend has been enabled for it). * - * The possible return values of this function are the same as for - * pm_runtime_autosuspend() and the runtime PM usage counter of @dev remains - * decremented in all cases, even if it returns an error code. + * The runtime PM usage counter of @dev remains decremented in all cases, even + * if it returns an error code. + * + * Return: + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. + * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. + * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { -- cgit v1.2.3 From b3db492e8335417dfd66c1fa2ea08e1d2f7b6736 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:08 +0300 Subject: PM: runtime: Mark last busy stamp in pm_runtime_put_autosuspend() Set device's last busy timestamp to current time in pm_runtime_put_autosuspend(). Callers wishing not to do that will need to use __pm_runtime_put_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-3-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9dd2e4031a27..14ca7be96686 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -568,11 +568,13 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) } /** - * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. + * pm_runtime_put_autosuspend - Update the last access time of a device, drop + * its usage counter and queue autosuspend if the usage counter becomes 0. * @dev: Target device. * - * Decrement the runtime PM usage counter of @dev and if it turns out to be - * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * Update the last access time of @dev, decrement runtime PM usage counter of + * @dev and if it turns out to be equal to 0, queue up a work item for @dev like + * in pm_request_autosuspend(). * * Return: * * 0: Success. @@ -587,8 +589,8 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) */ static inline int pm_runtime_put_autosuspend(struct device *dev) { - return __pm_runtime_suspend(dev, - RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); + pm_runtime_mark_last_busy(dev); + return __pm_runtime_put_autosuspend(dev); } /** -- cgit v1.2.3 From e24e0630b5ba13e83f65905becde9945518efa0b Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:09 +0300 Subject: PM: runtime: Mark last busy stamp in pm_runtime_put_sync_autosuspend() Set device's last busy timestamp to current time in pm_runtime_put_sync_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-4-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 14ca7be96686..3a0d5f0ea471 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -651,12 +651,14 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) } /** - * pm_runtime_put_sync_autosuspend - Drop device usage counter and autosuspend if 0. + * pm_runtime_put_sync_autosuspend - Update the last access time of a device, + * drop device usage counter and autosuspend if 0. * @dev: Target device. * - * Decrement the runtime PM usage counter of @dev and if it turns out to be - * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending - * on whether or not autosuspend has been enabled for it). + * Update the last access time of @dev, decrement the runtime PM usage counter + * of @dev and if it turns out to be equal to 0, set up autosuspend of @dev or + * suspend it synchronously (depending on whether or not autosuspend has been + * enabled for it). * * The runtime PM usage counter of @dev remains decremented in all cases, even * if it returns an error code. @@ -676,6 +678,7 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { + pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } -- cgit v1.2.3 From 08071e64cb642ae19ebd6ffeb13b4f3d130b5860 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:10 +0300 Subject: PM: runtime: Mark last busy stamp in pm_runtime_autosuspend() Set device's last busy timestamp to current time in pm_runtime_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-5-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 3a0d5f0ea471..566a07b60f63 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -379,11 +379,13 @@ static inline int pm_runtime_suspend(struct device *dev) } /** - * pm_runtime_autosuspend - Set up autosuspend of a device or suspend it. + * pm_runtime_autosuspend - Update the last access time and set up autosuspend + * of a device. * @dev: Target device. * - * Set up autosuspend of @dev or suspend it (depending on whether or not - * autosuspend is enabled for it) without engaging its "idle check" callback. + * First update the last access time, then set up autosuspend of @dev or suspend + * it (depending on whether or not autosuspend is enabled for it) without + * engaging its "idle check" callback. * * Return: * * 0: Success. @@ -399,6 +401,7 @@ static inline int pm_runtime_suspend(struct device *dev) */ static inline int pm_runtime_autosuspend(struct device *dev) { + pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_AUTO); } -- cgit v1.2.3 From 18c1fe53d186867243f4cf17f4eef60737a16c4c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 16 Jun 2025 09:12:11 +0300 Subject: PM: runtime: Mark last busy stamp in pm_request_autosuspend() Set device's last busy timestamp to current time in pm_request_autosuspend(). Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250616061212.2286741-6-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 566a07b60f63..778d5988f35e 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -448,11 +448,12 @@ static inline int pm_request_resume(struct device *dev) } /** - * pm_request_autosuspend - Queue up autosuspend of a device. + * pm_request_autosuspend - Update the last access time and queue up autosuspend + * of a device. * @dev: Target device. * - * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev - * asynchronously. + * Update the last access time of a device and queue up a work item to run an + * equivalent pm_runtime_autosuspend() for @dev asynchronously. * * Return: * * 0: Success. @@ -467,6 +468,7 @@ static inline int pm_request_resume(struct device *dev) */ static inline int pm_request_autosuspend(struct device *dev) { + pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); } -- cgit v1.2.3 From c6d732c38f93c4aebd204a5656583142289c3a2e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jun 2025 13:22:40 -0700 Subject: net: ethtool: remove duplicate defines for family info Commit under fixes switched to uAPI generation from the YAML spec. A number of custom defines were left behind, mostly for commands very hard to express in YAML spec. Among what was left behind was the name and version of the generic netlink family. Problem is that the codegen always outputs those values so we ended up with a duplicated, differently named set of defines. Provide naming info in YAML and remove the incorrect defines. Fixes: 8d0580c6ebdd ("ethtool: regenerate uapi header from the spec") Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250617202240.811179-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 4 ---- include/uapi/linux/ethtool_netlink_generated.h | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9ff72cfb2e98..09a75bdb6560 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -208,10 +208,6 @@ enum { ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) }; -/* generic netlink info */ -#define ETHTOOL_GENL_NAME "ethtool" -#define ETHTOOL_GENL_VERSION 1 - #define ETHTOOL_MCGRP_MONITOR_NAME "monitor" #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 9a02f579de22..aa8ab5227c1e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -6,8 +6,8 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -#define ETHTOOL_FAMILY_NAME "ethtool" -#define ETHTOOL_FAMILY_VERSION 1 +#define ETHTOOL_GENL_NAME "ethtool" +#define ETHTOOL_GENL_VERSION 1 enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, -- cgit v1.2.3 From 4bfbc2691de8c869339090e851703209b17ba378 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 10 Jun 2025 12:40:59 +0200 Subject: mux: Convert mux_control_ops to a flex array member in mux_chip Convert mux_control_ops to a flexible array member at the end of the mux_chip struct and add the __counted_by() compiler attribute to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Use struct_size() to calculate the number of bytes to allocate for a new mux chip and to remove the following Coccinelle/coccicheck warning: WARNING: Use struct_size Use size_add() to safely add any extra bytes. No functional changes intended. Link: https://github.com/KSPP/linux/issues/83 Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250610104106.1948-2-thorsten.blum@linux.dev Signed-off-by: Kees Cook --- include/linux/mux/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mux/driver.h b/include/linux/mux/driver.h index 18824064f8c0..e58e59354e23 100644 --- a/include/linux/mux/driver.h +++ b/include/linux/mux/driver.h @@ -56,18 +56,18 @@ struct mux_control { /** * struct mux_chip - Represents a chip holding mux controllers. * @controllers: Number of mux controllers handled by the chip. - * @mux: Array of mux controllers that are handled. * @dev: Device structure. * @id: Used to identify the device internally. * @ops: Mux controller operations. + * @mux: Array of mux controllers that are handled. */ struct mux_chip { unsigned int controllers; - struct mux_control *mux; struct device dev; int id; const struct mux_control_ops *ops; + struct mux_control mux[] __counted_by(controllers); }; #define to_mux_chip(x) container_of((x), struct mux_chip, dev) -- cgit v1.2.3 From 29bb79e9dbf1ba100125e39deb7147acd490903f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Jun 2025 13:05:36 -0600 Subject: stddef: Introduce TRAILING_OVERLAP() helper macro Add new TRAILING_OVERLAP() helper macro to create a union between a flexible-array member (FAM) and a set of members that would otherwise follow it. This overlays the trailing members onto the FAM while preserving the original memory layout. Co-developed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/aFG8gEwKXAWWIvX0@kspp Signed-off-by: Kees Cook --- include/linux/stddef.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 929d67710cc5..dab49e2ec8c0 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -93,4 +93,24 @@ enum { #define DECLARE_FLEX_ARRAY(TYPE, NAME) \ __DECLARE_FLEX_ARRAY(TYPE, NAME) +/** + * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members. + * + * Creates a union between a flexible-array member (FAM) in a struct and a set + * of additional members that would otherwise follow it. + * + * @TYPE: Flexible structure type name, including "struct" keyword. + * @NAME: Name for a variable to define. + * @FAM: The flexible-array member within @TYPE + * @MEMBERS: Trailing overlapping members. + */ +#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS) \ + union { \ + TYPE NAME; \ + struct { \ + unsigned char __offset_to_##FAM[offsetof(TYPE, FAM)]; \ + MEMBERS \ + }; \ + } + #endif -- cgit v1.2.3 From 0c25ae62f5dc6a438b563536b5fe7fb6da3612b8 Mon Sep 17 00:00:00 2001 From: Luo Jie Date: Tue, 10 Jun 2025 18:35:18 +0800 Subject: dt-bindings: clock: qcom: Add CMN PLL support for IPQ5424 SoC The CMN PLL block in the IPQ5424 SoC takes 48 MHZ as the reference input clock. The output clocks are the same as IPQ9574 SoC, except for the clock rate of output clocks to PPE and NSS. Also, add the new header file to export the CMN PLL output clock specifiers for IPQ5424 SoC. Acked-by: Rob Herring (Arm) Signed-off-by: Luo Jie Link: https://lore.kernel.org/r/20250610-qcom_ipq5424_cmnpll-v3-1-ceada8165645@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq5424-cmn-pll.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,ipq5424-cmn-pll.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq5424-cmn-pll.h b/include/dt-bindings/clock/qcom,ipq5424-cmn-pll.h new file mode 100644 index 000000000000..f643c2668c04 --- /dev/null +++ b/include/dt-bindings/clock/qcom,ipq5424-cmn-pll.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_IPQ5424_CMN_PLL_H +#define _DT_BINDINGS_CLK_QCOM_IPQ5424_CMN_PLL_H + +/* CMN PLL core clock. */ +#define IPQ5424_CMN_PLL_CLK 0 + +/* The output clocks from CMN PLL of IPQ5424. */ +#define IPQ5424_XO_24MHZ_CLK 1 +#define IPQ5424_SLEEP_32KHZ_CLK 2 +#define IPQ5424_PCS_31P25MHZ_CLK 3 +#define IPQ5424_NSS_300MHZ_CLK 4 +#define IPQ5424_PPE_375MHZ_CLK 5 +#define IPQ5424_ETH0_50MHZ_CLK 6 +#define IPQ5424_ETH1_50MHZ_CLK 7 +#define IPQ5424_ETH2_50MHZ_CLK 8 +#define IPQ5424_ETH_25MHZ_CLK 9 +#endif -- cgit v1.2.3 From a33556940b5727191613104bced53c93f4a7a3aa Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 17 Jun 2025 21:06:13 +0800 Subject: tcp: Remove inet_hashinfo2_free_mod() DCCP was removed, inet_hashinfo2_free_mod() is unused now. Signed-off-by: Yue Haibing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250617130613.498659-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4564b5d348b1..ae09e91398a5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -202,12 +202,6 @@ static inline spinlock_t *inet_ehash_lockp( int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); -static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h) -{ - kfree(h->lhash2); - h->lhash2 = NULL; -} - static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) { kvfree(hashinfo->ehash_locks); -- cgit v1.2.3 From 1ead7501094c6a61461c0c98dde9ec5660fa1e24 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 16 Jun 2025 09:21:14 -0700 Subject: udp_tunnel: remove rtnl_lock dependency Drivers that are using ops lock and don't depend on RTNL lock still need to manage it because udp_tunnel's RTNL dependency. Introduce new udp_tunnel_nic_lock and use it instead of rtnl_lock. Drop non-UDP_TUNNEL_NIC_INFO_MAY_SLEEP mode from udp_tunnel infra (udp_tunnel_nic_device_sync_work needs to grab udp_tunnel_nic_lock mutex and might sleep). Cover more places in v4: - netlink - udp_tunnel_notify_add_rx_port (ndo_open) - triggers udp_tunnel_nic_device_sync_work - udp_tunnel_notify_del_rx_port (ndo_stop) - triggers udp_tunnel_nic_device_sync_work - udp_tunnel_get_rx_info (__netdev_update_features) - triggers NETDEV_UDP_TUNNEL_PUSH_INFO - udp_tunnel_drop_rx_info (__netdev_update_features) - triggers NETDEV_UDP_TUNNEL_DROP_INFO - udp_tunnel_nic_reset_ntf (ndo_open) - notifiers - udp_tunnel_nic_netdevice_event, depending on the event: - triggers NETDEV_UDP_TUNNEL_PUSH_INFO - triggers NETDEV_UDP_TUNNEL_DROP_INFO - ethnl_tunnel_info_reply_size - udp_tunnel_nic_set_port_priv (two intel drivers) Cc: Michael Chan Suggested-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250616162117.287806-4-stfomichev@gmail.com Signed-off-by: Jakub Kicinski --- include/net/udp_tunnel.h | 87 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index e3c70b579095..cbd3a43074bd 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -130,22 +130,6 @@ void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); -static inline void udp_tunnel_get_rx_info(struct net_device *dev) -{ - ASSERT_RTNL(); - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); -} - -static inline void udp_tunnel_drop_rx_info(struct net_device *dev) -{ - ASSERT_RTNL(); - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); -} - /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, @@ -222,19 +206,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { - /* Device callbacks may sleep */ - UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), /* Device only supports offloads when it's open, all ports * will be removed before close and re-added after open. */ - UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), + UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(0), /* Device supports only IPv4 tunnels */ - UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), + UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(1), /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. * This port must not be counted towards n_entries of any table. * Driver will not receive any callback associated with port 4789. */ - UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), + UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(2), }; struct udp_tunnel_nic; @@ -325,6 +307,9 @@ struct udp_tunnel_nic_ops { size_t (*dump_size)(struct net_device *dev, unsigned int table); int (*dump_write)(struct net_device *dev, unsigned int table, struct sk_buff *skb); + void (*assert_locked)(struct net_device *dev); + void (*lock)(struct net_device *dev); + void (*unlock)(struct net_device *dev); }; #ifdef CONFIG_INET @@ -353,8 +338,29 @@ static inline void udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { - if (udp_tunnel_nic_ops) + if (udp_tunnel_nic_ops) { + udp_tunnel_nic_ops->lock(dev); udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); + udp_tunnel_nic_ops->unlock(dev); + } +} + +static inline void udp_tunnel_nic_assert_locked(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->assert_locked(dev); +} + +static inline void udp_tunnel_nic_lock(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->lock(dev); +} + +static inline void udp_tunnel_nic_unlock(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->unlock(dev); } static inline void @@ -396,17 +402,50 @@ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) static inline size_t udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { + size_t ret; + if (!udp_tunnel_nic_ops) return 0; - return udp_tunnel_nic_ops->dump_size(dev, table); + + udp_tunnel_nic_ops->lock(dev); + ret = udp_tunnel_nic_ops->dump_size(dev, table); + udp_tunnel_nic_ops->unlock(dev); + + return ret; } static inline int udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { + int ret; + if (!udp_tunnel_nic_ops) return 0; - return udp_tunnel_nic_ops->dump_write(dev, table, skb); + + udp_tunnel_nic_ops->lock(dev); + ret = udp_tunnel_nic_ops->dump_write(dev, table, skb); + udp_tunnel_nic_ops->unlock(dev); + + return ret; +} + +static inline void udp_tunnel_get_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + udp_tunnel_nic_assert_locked(dev); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } + +static inline void udp_tunnel_drop_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + udp_tunnel_nic_assert_locked(dev); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); +} + #endif -- cgit v1.2.3 From cea465a96a294e7bc2537f27a737cfa7c6234b3d Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Tue, 17 Jun 2025 14:05:41 +0300 Subject: devlink: Add new "enable_phc" generic device param Add a new device generic parameter to enable/disable the PHC (PTP Hardware Clock) functionality in the device associated with the devlink instance. Signed-off-by: David Arinzon Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250617110545.5659-6-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0091f23a40f7..63517646a497 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -520,6 +520,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -578,6 +579,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size" #define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME "enable_phc" +#define DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From fa2f0454174c2f33005f5a6e6f70c7160a15b2a1 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Tue, 17 Jun 2025 14:12:00 +0200 Subject: net: pse-pd: Introduce attached_phydev to pse control In preparation for reporting PSE events via ethtool notifications, introduce an attached_phydev field in the pse_control structure. This field stores the phy_device associated with the PSE PI, ensuring that notifications are sent to the correct network interface. The attached_phydev pointer is directly tied to the PHY lifecycle. It is set when the PHY is registered and cleared when the PHY is removed. There is no need to use a refcount, as doing so could interfere with the PHY removal process. Signed-off-by: Kory Maincent (Dent Project) Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250617-feature_poe_port_prio-v14-1-78a1a645e2ee@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index c773eeb92d04..8b0866fad2ad 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -250,7 +250,8 @@ struct device; int devm_pse_controller_register(struct device *dev, struct pse_controller_dev *pcdev); -struct pse_control *of_pse_control_get(struct device_node *node); +struct pse_control *of_pse_control_get(struct device_node *node, + struct phy_device *phydev); void pse_control_put(struct pse_control *psec); int pse_ethtool_get_status(struct pse_control *psec, @@ -268,7 +269,8 @@ bool pse_has_c33(struct pse_control *psec); #else -static inline struct pse_control *of_pse_control_get(struct device_node *node) +static inline struct pse_control *of_pse_control_get(struct device_node *node, + struct phy_device *phydev) { return ERR_PTR(-ENOENT); } -- cgit v1.2.3 From fc0e6db30941a66e284b8516b82356f97f31061d Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Tue, 17 Jun 2025 14:12:01 +0200 Subject: net: pse-pd: Add support for reporting events Add support for devm_pse_irq_helper() to register PSE interrupts and report events such as over-current or over-temperature conditions. This follows a similar approach to the regulator API but also sends notifications using a dedicated PSE ethtool netlink socket. Signed-off-by: Kory Maincent (Dent Project) Link: https://patch.msgid.link/20250617-feature_poe_port_prio-v14-2-78a1a645e2ee@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool_netlink.h | 7 +++++++ include/linux/pse-pd/pse.h | 20 ++++++++++++++++++++ include/uapi/linux/ethtool_netlink_generated.h | 19 +++++++++++++++++++ 3 files changed, 46 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index aba91335273a..1dcc4059b5ab 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -43,6 +43,8 @@ void ethtool_aggregate_rmon_stats(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats); bool ethtool_dev_mm_supported(struct net_device *dev); +void ethnl_pse_send_ntf(struct net_device *netdev, unsigned long notif); + #else static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) { @@ -120,6 +122,11 @@ static inline bool ethtool_dev_mm_supported(struct net_device *dev) return false; } +static inline void ethnl_pse_send_ntf(struct phy_device *phydev, + unsigned long notif) +{ +} + #endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */ static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 8b0866fad2ad..6eb064722aa8 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -7,12 +7,15 @@ #include #include +#include +#include /* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */ #define MAX_PI_CURRENT 1920000 /* Maximum power in mW according to IEEE 802.3-2022 Table 145-16 */ #define MAX_PI_PW 99900 +struct net_device; struct phy_device; struct pse_controller_dev; struct netlink_ext_ack; @@ -37,6 +40,19 @@ struct ethtool_c33_pse_pw_limit_range { u32 max; }; +/** + * struct pse_irq_desc - notification sender description for IRQ based events. + * + * @name: the visible name for the IRQ + * @map_event: driver callback to map IRQ status into PSE devices with events. + */ +struct pse_irq_desc { + const char *name; + int (*map_event)(int irq, struct pse_controller_dev *pcdev, + unsigned long *notifs, + unsigned long *notifs_mask); +}; + /** * struct pse_control_config - PSE control/channel configuration. * @@ -228,6 +244,7 @@ struct pse_pi { * @types: types of the PSE controller * @pi: table of PSE PIs described in this controller device * @no_of_pse_pi: flag set if the pse_pis devicetree node is not used + * @irq: PSE interrupt */ struct pse_controller_dev { const struct pse_controller_ops *ops; @@ -241,6 +258,7 @@ struct pse_controller_dev { enum ethtool_pse_types types; struct pse_pi *pi; bool no_of_pse_pi; + int irq; }; #if IS_ENABLED(CONFIG_PSE_CONTROLLER) @@ -249,6 +267,8 @@ void pse_controller_unregister(struct pse_controller_dev *pcdev); struct device; int devm_pse_controller_register(struct device *dev, struct pse_controller_dev *pcdev); +int devm_pse_irq_helper(struct pse_controller_dev *pcdev, int irq, + int irq_flags, const struct pse_irq_desc *d); struct pse_control *of_pse_control_get(struct device_node *node, struct phy_device *phydev); diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 9a02f579de22..3864aa0de8c7 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -49,6 +49,16 @@ enum hwtstamp_source { HWTSTAMP_SOURCE_PHYLIB, }; +/** + * enum ethtool_pse_event - PSE event list for the PSE controller + * @ETHTOOL_PSE_EVENT_OVER_CURRENT: PSE output current is too high + * @ETHTOOL_PSE_EVENT_OVER_TEMP: PSE in over temperature state + */ +enum ethtool_pse_event { + ETHTOOL_PSE_EVENT_OVER_CURRENT = 1, + ETHTOOL_PSE_EVENT_OVER_TEMP = 2, +}; + enum { ETHTOOL_A_HEADER_UNSPEC, ETHTOOL_A_HEADER_DEV_INDEX, @@ -718,6 +728,14 @@ enum { ETHTOOL_A_TSCONFIG_MAX = (__ETHTOOL_A_TSCONFIG_CNT - 1) }; +enum { + ETHTOOL_A_PSE_NTF_HEADER = 1, + ETHTOOL_A_PSE_NTF_EVENTS, + + __ETHTOOL_A_PSE_NTF_CNT, + ETHTOOL_A_PSE_NTF_MAX = (__ETHTOOL_A_PSE_NTF_CNT - 1) +}; + enum { ETHTOOL_MSG_USER_NONE = 0, ETHTOOL_MSG_STRSET_GET = 1, @@ -822,6 +840,7 @@ enum { ETHTOOL_MSG_PHY_NTF, ETHTOOL_MSG_TSCONFIG_GET_REPLY, ETHTOOL_MSG_TSCONFIG_SET_REPLY, + ETHTOOL_MSG_PSE_NTF, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) -- cgit v1.2.3 From 50f8b341d26826aa5fdccb8f497fbff2500934b3 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Tue, 17 Jun 2025 14:12:03 +0200 Subject: net: pse-pd: Add support for PSE power domains Introduce PSE power domain support as groundwork for upcoming port priority features. Multiple PSE PIs can now be grouped under a single PSE power domain, enabling future enhancements like defining available power budgets, port priority modes, and disconnection policies. This setup will allow the system to assess whether activating a port would exceed the available power budget, preventing over-budget states proactively. Signed-off-by: Kory Maincent (Dent Project) Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250617-feature_poe_port_prio-v14-4-78a1a645e2ee@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6eb064722aa8..f736b1677ea5 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -222,12 +222,14 @@ struct pse_pi_pairset { * @np: device node pointer of the PSE PI node * @rdev: regulator represented by the PSE PI * @admin_state_enabled: PI enabled state + * @pw_d: Power domain of the PSE PI */ struct pse_pi { struct pse_pi_pairset pairset[2]; struct device_node *np; struct regulator_dev *rdev; bool admin_state_enabled; + struct pse_power_domain *pw_d; }; /** -- cgit v1.2.3 From 1176978ed851952652ddea3685e2f71a0e5d61ff Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Tue, 17 Jun 2025 14:12:04 +0200 Subject: net: ethtool: Add support for new power domains index description Report the index of the newly introduced PSE power domain to the user, enabling improved management of the power budget for PSE devices. Signed-off-by: Kory Maincent (Dent Project) Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250617-feature_poe_port_prio-v14-5-78a1a645e2ee@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 2 ++ include/uapi/linux/ethtool_netlink_generated.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index f736b1677ea5..2f8ecfd87d43 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -114,6 +114,7 @@ struct pse_pw_limit_ranges { /** * struct ethtool_pse_control_status - PSE control/channel status. * + * @pw_d_id: PSE power domain index. * @podl_admin_state: operational state of the PoDL PSE * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState * @podl_pw_status: power detection status of the PoDL PSE. @@ -135,6 +136,7 @@ struct pse_pw_limit_ranges { * ranges */ struct ethtool_pse_control_status { + u32 pw_d_id; enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; enum ethtool_c33_pse_admin_state c33_admin_state; diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 3864aa0de8c7..ed344c8533eb 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -652,6 +652,7 @@ enum { ETHTOOL_A_C33_PSE_EXT_SUBSTATE, ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, + ETHTOOL_A_PSE_PW_D_ID, __ETHTOOL_A_PSE_CNT, ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) -- cgit v1.2.3 From ffef61d6d27374542f1bce4452200d9bdd2e1edd Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Tue, 17 Jun 2025 14:12:06 +0200 Subject: net: pse-pd: Add support for budget evaluation strategies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces the ability to configure the PSE PI budget evaluation strategies. Budget evaluation strategies is utilized by PSE controllers to determine which ports to turn off first in scenarios such as power budget exceedance. The pis_prio_max value is used to define the maximum priority level supported by the controller. Both the current priority and the maximum priority are exposed to the user through the pse_ethtool_get_status call. This patch add support for two mode of budget evaluation strategies. 1. Static Method: This method involves distributing power based on PD classification. It’s straightforward and stable, the PSE core keeping track of the budget and subtracting the power requested by each PD’s class. Advantages: Every PD gets its promised power at any time, which guarantees reliability. Disadvantages: PD classification steps are large, meaning devices request much more power than they actually need. As a result, the power supply may only operate at, say, 50% capacity, which is inefficient and wastes money. Priority max value is matching the number of PSE PIs within the PSE. 2. Dynamic Method: To address the inefficiencies of the static method, vendors like Microchip have introduced dynamic power budgeting, as seen in the PD692x0 firmware. This method monitors the current consumption per port and subtracts it from the available power budget. When the budget is exceeded, lower-priority ports are shut down. Advantages: This method optimizes resource utilization, saving costs. Disadvantages: Low-priority devices may experience instability. Priority max value is set by the PSE controller driver. For now, budget evaluation methods are not configurable and cannot be mixed. They are hardcoded in the PSE driver itself, as no current PSE controller supports both methods. Signed-off-by: Kory Maincent (Dent Project) Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20250617-feature_poe_port_prio-v14-7-78a1a645e2ee@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 76 ++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink_generated.h | 18 ++++++ 2 files changed, 94 insertions(+) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 2f8ecfd87d43..e5f305cef82e 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -6,6 +6,8 @@ #define _LINUX_PSE_CONTROLLER_H #include +#include +#include #include #include #include @@ -134,6 +136,9 @@ struct pse_pw_limit_ranges { * is in charge of the memory allocation * @c33_pw_limit_nb_ranges: number of supported power limit configuration * ranges + * @prio_max: max priority allowed for the c33_prio variable value. + * @prio: priority of the PSE. Managed by PSE core in case of static budget + * evaluation strategy. */ struct ethtool_pse_control_status { u32 pw_d_id; @@ -147,6 +152,8 @@ struct ethtool_pse_control_status { u32 c33_avail_pw_limit; struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; u32 c33_pw_limit_nb_ranges; + u32 prio_max; + u32 prio; }; /** @@ -170,6 +177,11 @@ struct ethtool_pse_control_status { * range. The driver is in charge of the memory * allocation and should return the number of * ranges. + * @pi_get_prio: Get the PSE PI priority. + * @pi_set_prio: Configure the PSE PI priority. + * @pi_get_pw_req: Get the power requested by a PD before enabling the PSE PI. + * This is only relevant when an interrupt is registered using + * devm_pse_irq_helper helper. */ struct pse_controller_ops { int (*setup_pi_matrix)(struct pse_controller_dev *pcdev); @@ -190,6 +202,10 @@ struct pse_controller_ops { int id, int max_mW); int (*pi_get_pw_limit_ranges)(struct pse_controller_dev *pcdev, int id, struct pse_pw_limit_ranges *pw_limit_ranges); + int (*pi_get_prio)(struct pse_controller_dev *pcdev, int id); + int (*pi_set_prio)(struct pse_controller_dev *pcdev, int id, + unsigned int prio); + int (*pi_get_pw_req)(struct pse_controller_dev *pcdev, int id); }; struct module; @@ -225,6 +241,13 @@ struct pse_pi_pairset { * @rdev: regulator represented by the PSE PI * @admin_state_enabled: PI enabled state * @pw_d: Power domain of the PSE PI + * @prio: Priority of the PSE PI. Used in static budget evaluation strategy + * @isr_pd_detected: PSE PI detection status managed by the interruption + * handler. This variable is relevant when the power enabled + * management is managed in software like the static + * budget evaluation strategy. + * @pw_allocated_mW: Power allocated to a PSE PI to manage power budget in + * static budget evaluation strategy. */ struct pse_pi { struct pse_pi_pairset pairset[2]; @@ -232,6 +255,20 @@ struct pse_pi { struct regulator_dev *rdev; bool admin_state_enabled; struct pse_power_domain *pw_d; + int prio; + bool isr_pd_detected; + int pw_allocated_mW; +}; + +/** + * struct pse_ntf - PSE notification element + * + * @id: ID of the PSE control + * @notifs: PSE notifications to be reported + */ +struct pse_ntf { + int id; + unsigned long notifs; }; /** @@ -249,6 +286,12 @@ struct pse_pi { * @pi: table of PSE PIs described in this controller device * @no_of_pse_pi: flag set if the pse_pis devicetree node is not used * @irq: PSE interrupt + * @pis_prio_max: Maximum value allowed for the PSE PIs priority + * @supp_budget_eval_strategies: budget evaluation strategies supported + * by the PSE + * @ntf_work: workqueue for PSE notification management + * @ntf_fifo: PSE notifications FIFO + * @ntf_fifo_lock: protect @ntf_fifo writer */ struct pse_controller_dev { const struct pse_controller_ops *ops; @@ -263,6 +306,29 @@ struct pse_controller_dev { struct pse_pi *pi; bool no_of_pse_pi; int irq; + unsigned int pis_prio_max; + u32 supp_budget_eval_strategies; + struct work_struct ntf_work; + DECLARE_KFIFO_PTR(ntf_fifo, struct pse_ntf); + spinlock_t ntf_fifo_lock; /* Protect @ntf_fifo writer */ +}; + +/** + * enum pse_budget_eval_strategies - PSE budget evaluation strategies. + * @PSE_BUDGET_EVAL_STRAT_DISABLED: Budget evaluation strategy disabled. + * @PSE_BUDGET_EVAL_STRAT_STATIC: PSE static budget evaluation strategy. + * Budget evaluation strategy based on the power requested during PD + * classification. This strategy is managed by the PSE core. + * @PSE_BUDGET_EVAL_STRAT_DYNAMIC: PSE dynamic budget evaluation + * strategy. Budget evaluation strategy based on the current consumption + * per ports compared to the total power budget. This mode is managed by + * the PSE controller. + */ + +enum pse_budget_eval_strategies { + PSE_BUDGET_EVAL_STRAT_DISABLED = 1 << 0, + PSE_BUDGET_EVAL_STRAT_STATIC = 1 << 1, + PSE_BUDGET_EVAL_STRAT_DYNAMIC = 1 << 2, }; #if IS_ENABLED(CONFIG_PSE_CONTROLLER) @@ -287,6 +353,9 @@ int pse_ethtool_set_config(struct pse_control *psec, int pse_ethtool_set_pw_limit(struct pse_control *psec, struct netlink_ext_ack *extack, const unsigned int pw_limit); +int pse_ethtool_set_prio(struct pse_control *psec, + struct netlink_ext_ack *extack, + unsigned int prio); bool pse_has_podl(struct pse_control *psec); bool pse_has_c33(struct pse_control *psec); @@ -324,6 +393,13 @@ static inline int pse_ethtool_set_pw_limit(struct pse_control *psec, return -EOPNOTSUPP; } +static inline int pse_ethtool_set_prio(struct pse_control *psec, + struct netlink_ext_ack *extack, + unsigned int prio) +{ + return -EOPNOTSUPP; +} + static inline bool pse_has_podl(struct pse_control *psec) { return false; diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index ed344c8533eb..c6a95224be25 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -53,10 +53,28 @@ enum hwtstamp_source { * enum ethtool_pse_event - PSE event list for the PSE controller * @ETHTOOL_PSE_EVENT_OVER_CURRENT: PSE output current is too high * @ETHTOOL_PSE_EVENT_OVER_TEMP: PSE in over temperature state + * @ETHTOOL_C33_PSE_EVENT_DETECTION: detection process occur on the PSE. IEEE + * 802.3-2022 33.2.5 and 145.2.6 PSE detection of PDs. IEEE 802.3-202 + * 30.9.1.1.5 aPSEPowerDetectionStatus + * @ETHTOOL_C33_PSE_EVENT_CLASSIFICATION: classification process occur on the + * PSE. IEEE 802.3-2022 33.2.6 and 145.2.8 classification of PDs mutual + * identification. IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification. + * @ETHTOOL_C33_PSE_EVENT_DISCONNECTION: PD has been disconnected on the PSE. + * IEEE 802.3-2022 33.3.8 and 145.3.9 PD Maintain Power Signature. IEEE + * 802.3-2022 33.5.1.2.9 MPS Absent. IEEE 802.3-2022 30.9.1.1.20 + * aPSEMPSAbsentCounter. + * @ETHTOOL_PSE_EVENT_OVER_BUDGET: PSE turned off due to over budget situation + * @ETHTOOL_PSE_EVENT_SW_PW_CONTROL_ERROR: PSE faced an error managing the + * power control from software */ enum ethtool_pse_event { ETHTOOL_PSE_EVENT_OVER_CURRENT = 1, ETHTOOL_PSE_EVENT_OVER_TEMP = 2, + ETHTOOL_C33_PSE_EVENT_DETECTION = 4, + ETHTOOL_C33_PSE_EVENT_CLASSIFICATION = 8, + ETHTOOL_C33_PSE_EVENT_DISCONNECTION = 16, + ETHTOOL_PSE_EVENT_OVER_BUDGET = 32, + ETHTOOL_PSE_EVENT_SW_PW_CONTROL_ERROR = 64, }; enum { -- cgit v1.2.3 From eeb0c8f72f49a21984981188404cfd3700edbaff Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Tue, 17 Jun 2025 14:12:07 +0200 Subject: net: ethtool: Add PSE port priority support feature This patch expands the status information provided by ethtool for PSE c33 with current port priority and max port priority. It also adds a call to pse_ethtool_set_prio() to configure the PSE port priority. Signed-off-by: Kory Maincent (Dent Project) Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250617-feature_poe_port_prio-v14-8-78a1a645e2ee@bootlin.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index c6a95224be25..8e5d067e7ddf 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -671,6 +671,8 @@ enum { ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, ETHTOOL_A_PSE_PW_D_ID, + ETHTOOL_A_PSE_PRIO_MAX, + ETHTOOL_A_PSE_PRIO, __ETHTOOL_A_PSE_CNT, ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) -- cgit v1.2.3 From deefc7083414de81aad102b60f0390f600d7eb79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= Date: Fri, 13 Jun 2025 12:14:12 +0200 Subject: gpio: mmio: add BGPIOF_NO_INPUT flag for GPO gpiochip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using bgpio_init with a gpiochip acting as a GPO (output only), the gpiochip ops `direction_input` was set to `bgpio_simple_dir_in` by default but we have no input ability. Adding this flag allows to set a valid ops for the `direction_output` ops without setting a valid ops for `direction_input` by default. Reviewed-by: Linus Walleij Signed-off-by: Clément Le Goffic Link: https://lore.kernel.org/r/20250613-hdp-upstream-v5-1-6fd6f0dc527c@foss.st.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b53233051bee..97cc75623261 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -750,6 +750,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) #define BGPIOF_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ +#define BGPIOF_NO_INPUT BIT(8) /* only output */ #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, -- cgit v1.2.3 From 6012ce6b30567aa8ec8dc5b648b7841f9f74ca7c Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Tue, 17 Jun 2025 09:31:37 +0200 Subject: leds: led-class-flash:: Fix flash_timeout comment The comment for the flash_timeout setter mentioned it is the "flash duration". Fix this by changing it to "flash timeout". Signed-off-by: Richard Leitner Link: https://lore.kernel.org/r/20250617-ov9282-flash-strobe-v5-3-9762da74d065@linux.dev Signed-off-by: Lee Jones --- include/linux/led-class-flash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 21ec856c36bc..775a96217518 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -197,7 +197,7 @@ int led_update_flash_brightness(struct led_classdev_flash *fled_cdev); * @fled_cdev: the flash LED to set * @timeout: the flash timeout to set it to * - * Set the flash strobe duration. + * Set the flash strobe timeout. * * Returns: 0 on success or negative error value on failure */ -- cgit v1.2.3 From bc9a0c68f2583e42ace81a9c229c158b7cdcb45b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 3 Jun 2025 21:39:05 +0100 Subject: usb: phy: tegra: Remove unused functions tegra_ehci_phy_restore_start() and tegra_ehci_phy_restore_end() last use was removed in 2013 by commit a4faa54e3aa2 ("USB: EHCI: tegra: remove all power management") tegra_usb_phy_preresume() and tegra_usb_phy_postresume() last use was removed in 2020 by commit c3590c7656fb ("usb: host: ehci-tegra: Remove the driver") (Although that one makes me wonder how much of the rest of the file is actually used) Remove both sets. Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250603203905.279307-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tegra_usb_phy.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h index e6c14f2b1f9b..40afcee8b4f5 100644 --- a/include/linux/usb/tegra_usb_phy.h +++ b/include/linux/usb/tegra_usb_phy.h @@ -80,13 +80,4 @@ struct tegra_usb_phy { bool powered_on; }; -void tegra_usb_phy_preresume(struct usb_phy *phy); - -void tegra_usb_phy_postresume(struct usb_phy *phy); - -void tegra_ehci_phy_restore_start(struct usb_phy *phy, - enum tegra_usb_phy_port_speed port_speed); - -void tegra_ehci_phy_restore_end(struct usb_phy *phy); - #endif /* __TEGRA_USB_PHY_H */ -- cgit v1.2.3 From efec475e5e20c594e10d42f73cf6803805ae14c9 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 9 Jun 2025 00:33:37 +0100 Subject: usb: gadget: config: Remove unused usb_gadget_config_buf usb_gadget_config_buf() has been unused since 2012's commit fa06920a3ece ("usb: gadget: Remove File-backed Storage Gadget (g_file_storage).") Remove it. Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250608233338.179894-2-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index df33333650a0..0f28c5512fcb 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -860,10 +860,6 @@ container_of(str_item, struct gadget_string, item) int usb_descriptor_fillbuf(void *, unsigned, const struct usb_descriptor_header **); -/* build config descriptor from single descriptor vector */ -int usb_gadget_config_buf(const struct usb_config_descriptor *config, - void *buf, unsigned buflen, const struct usb_descriptor_header **desc); - /* copy a NULL-terminated vector of descriptors */ struct usb_descriptor_header **usb_copy_descriptors( struct usb_descriptor_header **); -- cgit v1.2.3 From 227280ad66ac50287a3ea8b8fd43b7c7a6eb09ac Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 9 Jun 2025 00:33:38 +0100 Subject: usb: gadget: Remove unused usb_remove_config usb_remove_config() was added in 2012's commit Fixes: 51cce6fc155c ("usb: gadget: composite: Add usb_remove_config") but has remained unused. I see there was a use in drivers/staging/cch that was removed by commit 515e6dd20b3f ("Staging: ccg: delete it from the tree") but it had it's own copy of usb_remove_config() Remove it. Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250608233338.179894-3-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index d8c4e9f73839..c18041fafa52 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -339,9 +339,6 @@ int usb_add_config(struct usb_composite_dev *, struct usb_configuration *, int (*)(struct usb_configuration *)); -void usb_remove_config(struct usb_composite_dev *, - struct usb_configuration *); - /* predefined index for usb_composite_driver */ enum { USB_GADGET_MANUFACTURER_IDX = 0, -- cgit v1.2.3 From 47c83f95f3e293a9e924a1ba41d782bb059bb954 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 9 Jun 2025 00:56:17 +0100 Subject: usb: core: Remove unused usb_unlink_anchored_urbs usb_unlink_anchored_urbs() has been unused since it's last use was removed in 2009 by commit 9b9c5aaeedfd ("ar9170: xmit code revamp") Remove it. Signed-off-by: "Dr. David Alan Gilbert" Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20250608235617.200731-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..e8662843e68c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1780,7 +1780,6 @@ extern void usb_block_urb(struct urb *urb); extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); extern void usb_poison_anchored_urbs(struct usb_anchor *anchor); extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor); -extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor); extern void usb_anchor_suspend_wakeups(struct usb_anchor *anchor); extern void usb_anchor_resume_wakeups(struct usb_anchor *anchor); extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); -- cgit v1.2.3 From 78c76554c6b94dfa5e101b870f0c57b6c230503e Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Sat, 14 Jun 2025 20:56:43 +0800 Subject: usb: chipidea: udc: add CI_HDRC_CONTROLLER_PULLUP_EVENT event The device controller will send CI_HDRC_CONTROLLER_PULLUP_EVENT event when it's going to pullup or pulldown data line. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250614125645.207732-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index ebdfef124b2b..e17ebeee24e3 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -72,6 +72,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_IMX_HSIC_ACTIVE_EVENT 2 #define CI_HDRC_IMX_HSIC_SUSPEND_EVENT 3 #define CI_HDRC_CONTROLLER_VBUS_EVENT 4 +#define CI_HDRC_CONTROLLER_PULLUP_EVENT 5 int (*notify_event) (struct ci_hdrc *ci, unsigned event); struct regulator *reg_vbus; struct usb_otg_caps ci_otg_caps; -- cgit v1.2.3 From 48ea23115887c12e53335ca2eddc0d0e3d99e5d9 Mon Sep 17 00:00:00 2001 From: RubenKelevra Date: Wed, 18 Jun 2025 18:47:43 +0200 Subject: include: fsl_devices.h: drop unused, misspelled FLS_USB2_WORKAROUND_ENGCM09152 The macro was introduced in commit 69cb1ec4ce4d ("mxc_udc: add workaround for ENGcm09152 for i.MX35") on 2010-10-15, but its prefix was misspelled as **FLS_** instead of the usual **FSL_**. Its last in-tree user disappeared with commit a390bef7db1f ("usb: gadget: fsl_mxc_udc: Remove the driver") on 2020-12-10, so the macro has been completely unused since then. Remove the dead and wrongly named definition. Signed-off-by: RubenKelevra Link: https://lore.kernel.org/r/20250618164743.1916838-1-rubenkelevra@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl_devices.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 5d231ce8709b..49f20c2f99bf 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -118,7 +118,6 @@ struct fsl_usb2_platform_data { #define FSL_USB2_PORT0_ENABLED 0x00000001 #define FSL_USB2_PORT1_ENABLED 0x00000002 -#define FLS_USB2_WORKAROUND_ENGCM09152 (1 << 0) struct spi_device; -- cgit v1.2.3 From 3b18405763c1ebb1efc15feef5563c9cdb2cc3a7 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 11 Jun 2025 14:14:15 +0300 Subject: usb: acpi: fix device link removal The device link to the USB4 host interface has to be removed manually since it's no longer auto removed. Fixes: 623dae3e7084 ("usb: acpi: fix boot hang due to early incorrect 'tunneled' USB3 device links") Cc: stable Signed-off-by: Heikki Krogerus Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20250611111415.2707865-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -614,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -724,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; -- cgit v1.2.3 From d574c5dc8cfe1fd1ddda6edb435f3b3f39155c52 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 8 Jun 2025 16:46:54 +0100 Subject: serial: Remove unused uart_get_console uart_get_console() has been unused since 2019's commit bd0d9d159988 ("serial: remove ks8695 driver") Remove it, and it's associated docs. Signed-off-by: "Dr. David Alan Gilbert" Acked-by: Randy Dunlap Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250608154654.73994-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d65b15449cfe..84b4648ead7e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -1101,8 +1101,6 @@ static inline bool uart_console_registered(struct uart_port *port) return uart_console(port) && console_is_registered(port->cons); } -struct uart_port *uart_get_console(struct uart_port *ports, int nr, - struct console *c); int uart_parse_earlycon(char *p, enum uart_iotype *iotype, resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, -- cgit v1.2.3 From ace9b3daf2b4778358573d3698e34cb1c0fa7e14 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 19 Jun 2025 13:56:23 +0300 Subject: ASoC: SOF: ipc4/Intel: Add support for library restore firmware functionality The firmware will be able to only save and restore the context related to library management. This means that even without a full context save, the libraries do not need to be re-loaded to the firmware after second or consecutive boots. This is reported via the FW_READY notification, where BIT(15) indicates: 0 - the library restore is not done 1 - library restore is done This bit is only valid if full context save is not enabled, full context save is by definition saves and restores the library related book-keeping as well. Add a new flag to tell the platform code if the libraries have been restored, no need to reload them after boot. Signed-off-by: Peter Ujfalusi Reviewed-by: Guennadi Liakhovetski Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen Link: https://patch.msgid.link/20250619105623.4546-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/ipc4/header.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/sof/ipc4/header.h b/include/sound/sof/ipc4/header.h index f71d04736d17..e85c7afd85a4 100644 --- a/include/sound/sof/ipc4/header.h +++ b/include/sound/sof/ipc4/header.h @@ -498,6 +498,8 @@ struct sof_ipc4_intel_mic_privacy_cap { #define SOF_IPC4_LOG_CORE_GET(x) (((x) & SOF_IPC4_LOG_CORE_MASK) >> \ SOF_IPC4_LOG_CORE_SHIFT) +#define SOF_IPC4_FW_READY_LIB_RESTORED BIT(15) + /* Value of notification type field - must fit into 8 bits */ enum sof_ipc4_notification_type { /* Phrase detected (notification from WoV module) */ -- cgit v1.2.3 From f12b45862c4dcb9c2937b83ed730e473b9a76cbf Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 19 May 2025 10:33:19 +0200 Subject: timekeeping: Introduce timekeeper ID As long as there is only a single timekeeper, there is no need to clarify which timekeeper is used. But with the upcoming reusage of the timekeeper infrastructure for auxiliary clock timekeepers, an ID is required to differentiate. Introduce an enum for timekeeper IDs, introduce a field in struct tk_data to store this timekeeper id and add also initialization. The id struct field is added at the end of the second cachline, as there is a 4 byte hole anyway. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.842476378@linutronix.de --- include/linux/timekeeper_internal.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 785048a3b3e6..bfcecad0e279 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -11,6 +11,16 @@ #include #include +/** + * timekeeper_ids - IDs for various time keepers in the kernel + * @TIMEKEEPER_CORE: The central core timekeeper managing system time + * @TIMEKEEPERS_MAX: The maximum number of timekeepers managed + */ +enum timekeeper_ids { + TIMEKEEPER_CORE, + TIMEKEEPERS_MAX, +}; + /** * struct tk_read_base - base structure for timekeeping readout * @clock: Current clocksource used for timekeeping. @@ -52,6 +62,7 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @coarse_nsec: The nanoseconds part for coarse time getters + * @id: The timekeeper ID * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -101,7 +112,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... coarse_nsec + * 1: xtime_sec ... id * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -123,6 +134,7 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; u32 coarse_nsec; + enum timekeeper_ids id; /* Cacheline 2: */ struct tk_read_base tkr_raw; -- cgit v1.2.3 From 9094c72c3d81bf2416b7c79d12c8494ab8fbac20 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 19 May 2025 10:33:20 +0200 Subject: time: Introduce auxiliary POSIX clocks To support auxiliary timekeeping and the related user space interfaces, it's required to define a clock ID range for them. Reserve 8 auxiliary clock IDs after the regular timekeeping clock ID space. This is the maximum number of auxiliary clocks the kernel can support. The actual number of supported clocks depends obviously on the presence of related devices and might be constraint by the available VDSO space. Add the corresponding timekeeper IDs as well. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.905800695@linutronix.de --- include/linux/timekeeper_internal.h | 10 ++++++++-- include/uapi/linux/time.h | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index bfcecad0e279..4201ae818f57 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -13,11 +13,17 @@ /** * timekeeper_ids - IDs for various time keepers in the kernel - * @TIMEKEEPER_CORE: The central core timekeeper managing system time - * @TIMEKEEPERS_MAX: The maximum number of timekeepers managed + * @TIMEKEEPER_CORE: The central core timekeeper managing system time + * @TIMEKEEPER_AUX_FIRST: The first AUX timekeeper + * @TIMEKEEPER_AUX_LAST: The last AUX timekeeper + * @TIMEKEEPERS_MAX: The maximum number of timekeepers managed */ enum timekeeper_ids { TIMEKEEPER_CORE, +#ifdef CONFIG_POSIX_AUX_CLOCKS + TIMEKEEPER_AUX_FIRST, + TIMEKEEPER_AUX_LAST = TIMEKEEPER_AUX_FIRST + MAX_AUX_CLOCKS - 1, +#endif TIMEKEEPERS_MAX, }; diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 4f4b6e48e01c..16ca1ac206fd 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -64,6 +64,17 @@ struct timezone { #define CLOCK_TAI 11 #define MAX_CLOCKS 16 + +/* + * AUX clock support. AUXiliary clocks are dynamically configured by + * enabling a clock ID. These clock can be steered independently of the + * core timekeeper. The kernel can support up to 8 auxiliary clocks, but + * the actual limit depends on eventual architecture constraints vs. VDSO. + */ +#define CLOCK_AUX MAX_CLOCKS +#define MAX_AUX_CLOCKS 8 +#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1) + #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC -- cgit v1.2.3 From 6168024604236cb2bb1004ea8459c8ece2c4ef5f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:27 +0200 Subject: timekeeping: Add clock_valid flag to timekeeper In preparation for supporting independent auxiliary timekeepers, add a clock valid field and set it to true for the system timekeeper. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.287145536@linutronix.de --- include/linux/timekeeper_internal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 4201ae818f57..1690eda1c7c3 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -73,6 +73,7 @@ struct tk_read_base { * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events + * @clock_valid: Indicator for valid clock * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -149,6 +150,7 @@ struct timekeeper { /* Cachline 3 and 4 (timekeeping internal variables): */ unsigned int clock_was_set_seq; u8 cs_was_changed_seq; + u8 clock_valid; struct timespec64 monotonic_to_boot; -- cgit v1.2.3 From 75215c972581d3934e76a57690cf838d7ceab399 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Jun 2025 22:53:38 +0200 Subject: pidfs: move to anonymous struct Move the pidfs entries to an anonymous struct. Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-4-98f3456fd552@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/linux/pid.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index 453ae6d8a68d..00646a692dd4 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -52,14 +52,15 @@ struct upid { struct pid_namespace *ns; }; -struct pid -{ +struct pid { refcount_t count; unsigned int level; spinlock_t lock; - struct dentry *stashed; - u64 ino; - struct rb_node pidfs_node; + struct { + u64 ino; + struct rb_node pidfs_node; + struct dentry *stashed; + }; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; -- cgit v1.2.3 From 8ec7c826d97b390879df2a03dfb035c70af86779 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Jun 2025 22:53:39 +0200 Subject: pidfs: persist information Persist exit and coredump information independent of whether anyone currently holds a pidfd for the struct pid. The current scheme allocated pidfs dentries on-demand repeatedly. This scheme is reaching it's limits as it makes it impossible to pin information that needs to be available after the task has exited or coredumped and that should not be lost simply because the pidfd got closed temporarily. The next opener should still see the stashed information. This is also a prerequisite for supporting extended attributes on pidfds to allow attaching meta information to them. If someone opens a pidfd for a struct pid a pidfs dentry is allocated and stashed in pid->stashed. Once the last pidfd for the struct pid is closed the pidfs dentry is released and removed from pid->stashed. So if 10 callers create a pidfs dentry for the same struct pid sequentially, i.e., each closing the pidfd before the other creates a new one then a new pidfs dentry is allocated every time. Because multiple tasks acquiring and releasing a pidfd for the same struct pid can race with each another a task may still find a valid pidfs entry from the previous task in pid->stashed and reuse it. Or it might find a dead dentry in there and fail to reuse it and so stashes a new pidfs dentry. Multiple tasks may race to stash a new pidfs dentry but only one will succeed, the other ones will put their dentry. The current scheme aims to ensure that a pidfs dentry for a struct pid can only be created if the task is still alive or if a pidfs dentry already existed before the task was reaped and so exit information has been was stashed in the pidfs inode. That's great except that it's buggy. If a pidfs dentry is stashed in pid->stashed after pidfs_exit() but before __unhash_process() is called we will return a pidfd for a reaped task without exit information being available. The pidfds_pid_valid() check does not guard against this race as it doens't sync at all with pidfs_exit(). The pid_has_task() check might be successful simply because we're before __unhash_process() but after pidfs_exit(). Introduce a new scheme where the lifetime of information associated with a pidfs entry (coredump and exit information) isn't bound to the lifetime of the pidfs inode but the struct pid itself. The first time a pidfs dentry is allocated for a struct pid a struct pidfs_attr will be allocated which will be used to store exit and coredump information. If all pidfs for the pidfs dentry are closed the dentry and inode can be cleaned up but the struct pidfs_attr will stick until the struct pid itself is freed. This will ensure minimal memory usage while persisting relevant information. The new scheme has various advantages. First, it allows to close the race where we end up handing out a pidfd for a reaped task for which no exit information is available. Second, it minimizes memory usage. Third, it allows to remove complex lifetime tracking via dentries when registering a struct pid with pidfs. There's no need to get or put a reference. Instead, the lifetime of exit and coredump information associated with a struct pid is bound to the lifetime of struct pid itself. Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-5-98f3456fd552@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/linux/pid.h | 3 +++ include/linux/pidfs.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index 00646a692dd4..003a1027d219 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -47,6 +47,8 @@ #define RESERVED_PIDS 300 +struct pidfs_attr; + struct upid { int nr; struct pid_namespace *ns; @@ -60,6 +62,7 @@ struct pid { u64 ino; struct rb_node pidfs_node; struct dentry *stashed; + struct pidfs_attr *attr; }; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 77e7db194914..8f6ed59bb3fb 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -16,5 +16,6 @@ extern const struct dentry_operations pidfs_dentry_operations; int pidfs_register_pid(struct pid *pid); void pidfs_get_pid(struct pid *pid); void pidfs_put_pid(struct pid *pid); +void pidfs_free_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ -- cgit v1.2.3 From 804d6794497e6f3992d156e07d01e22b037ce09e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Jun 2025 22:53:42 +0200 Subject: pidfs: remove pidfs_{get,put}_pid() Now that we stash persistent information in struct pid there's no need to play volatile games with pinning struct pid via dentries in pidfs. Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-8-98f3456fd552@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/linux/pidfs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 8f6ed59bb3fb..3e08c33da2df 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -14,8 +14,6 @@ void pidfs_coredump(const struct coredump_params *cprm); #endif extern const struct dentry_operations pidfs_dentry_operations; int pidfs_register_pid(struct pid *pid); -void pidfs_get_pid(struct pid *pid); -void pidfs_put_pid(struct pid *pid); void pidfs_free_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ -- cgit v1.2.3 From 180d8b4ce91fe0cf7a9cb236bb01f14587ba4bf0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:32 +0200 Subject: timekeeping: Add AUX offset to struct timekeeper This offset will be used in the time getters of auxiliary clocks. It is added to the "monotonic" clock readout. As auxiliary clocks do not utilize the offset fields of the core time keeper, this is just an alias for offs_tai, so that the cache line layout stays the same. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.533486349@linutronix.de --- include/linux/timekeeper_internal.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 1690eda1c7c3..ca79938b62f3 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -67,6 +67,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai + * @offs_aux: Offset clock monotonic -> clock AUX * @coarse_nsec: The nanoseconds part for coarse time getters * @id: The timekeeper ID * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW @@ -113,6 +114,9 @@ struct tk_read_base { * @monotonic_to_boottime is a timespec64 representation of @offs_boot to * accelerate the VDSO update for CLOCK_BOOTTIME. * + * @offs_aux is used by the auxiliary timekeepers which do not utilize any + * of the regular timekeeper offset fields. + * * The cacheline ordering of the structure is optimized for in kernel usage of * the ktime_get() and ktime_get_ts64() family of time accessors. Struct * timekeeper is prepended in the core timekeeping code with a sequence count, @@ -139,7 +143,10 @@ struct timekeeper { struct timespec64 wall_to_monotonic; ktime_t offs_real; ktime_t offs_boot; - ktime_t offs_tai; + union { + ktime_t offs_tai; + ktime_t offs_aux; + }; u32 coarse_nsec; enum timekeeper_ids id; -- cgit v1.2.3 From 75cabb46935b6de8e2bdfde563e460ac41cfff12 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Tue, 17 Jun 2025 00:17:34 -0700 Subject: net: mana: Add support for net_shaper_ops Introduce support for net_shaper_ops in the MANA driver, enabling configuration of rate limiting on the MANA NIC. To apply rate limiting, the driver issues a HWC command via mana_set_bw_clamp() and updates the corresponding shaper object in the net_shaper cache. If an error occurs during this process, the driver restores the previous speed by querying the current link configuration using mana_query_link_cfg(). The minimum supported bandwidth is 100 Mbps, and only values that are exact multiples of 100 Mbps are allowed. Any other values are rejected. To remove a shaper, the driver resets the bandwidth to the maximum supported by the SKU using mana_set_bw_clamp() and clears the associated cache entry. If an error occurs during this process, the shaper details are retained. On the hardware that does not support these APIs, the net-shaper calls to set speed would fail. Set the speed: ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/net_shaper.yaml \ --do set --json '{"ifindex":'$IFINDEX', "handle":{"scope": "netdev", "id":'$ID' }, "bw-max": 200000000 }' Get the shaper details: ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/net_shaper.yaml \ --do get --json '{"ifindex":'$IFINDEX', "handle":{"scope": "netdev", "id":'$ID' }}' > {'bw-max': 200000000, > 'handle': {'scope': 'netdev'}, > 'ifindex': $IFINDEX, > 'metric': 'bps'} Delete the shaper object: ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/net_shaper.yaml \ --do delete --json '{"ifindex":'$IFINDEX', "handle":{"scope": "netdev","id":'$ID' }}' Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Haiyang Zhang Reviewed-by: Shradha Gupta Reviewed-by: Saurabh Singh Sengar Reviewed-by: Long Li Link: https://patch.msgid.link/1750144656-2021-3-git-send-email-ernis@linux.microsoft.com Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 4176edf1be71..038b18340e51 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -5,6 +5,7 @@ #define _MANA_H #include +#include #include "gdma.h" #include "hw_channel.h" @@ -526,7 +527,12 @@ struct mana_port_context { struct mutex vport_mutex; int vport_use_count; + /* Net shaper handle*/ + struct net_shaper_handle handle; + u16 port_idx; + /* Currently configured speed (mbps) */ + u32 speed; bool port_is_up; bool port_st_save; /* Saved port state */ @@ -562,6 +568,9 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); +int mana_query_link_cfg(struct mana_port_context *apc); +int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, + int enable_clamping); void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); @@ -589,6 +598,8 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + MANA_QUERY_LINK_CONFIG = 0x2000A, + MANA_SET_BW_CLAMP = 0x2000B, MANA_QUERY_PHY_STAT = 0x2000c, /* Privileged commands for the PF mode */ @@ -598,6 +609,35 @@ enum mana_command_code { MANA_DEREGISTER_HW_PORT = 0x28004, }; +/* Query Link Configuration*/ +struct mana_query_link_config_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; +}; /* HW DATA */ + +struct mana_query_link_config_resp { + struct gdma_resp_hdr hdr; + u32 qos_speed_mbps; + u8 qos_unconfigured; + u8 reserved1[3]; + u32 link_speed_mbps; + u8 reserved2[4]; +}; /* HW DATA */ + +/* Set Bandwidth Clamp*/ +struct mana_set_bw_clamp_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + enum TRI_STATE enable_clamping; + u32 link_speed_mbps; +}; /* HW DATA */ + +struct mana_set_bw_clamp_resp { + struct gdma_resp_hdr hdr; + u8 qos_unconfigured; + u8 reserved[7]; +}; /* HW DATA */ + /* Query Device Configuration */ struct mana_query_device_cfg_req { struct gdma_req_hdr hdr; -- cgit v1.2.3 From a6d5edf11e0cf5a4650f1d353d20ec29de093813 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Tue, 17 Jun 2025 00:17:35 -0700 Subject: net: mana: Add speed support in mana_get_link_ksettings Allow mana ethtool get_link_ksettings operation to report the maximum speed supported by the SKU in mbps. The driver retrieves this information by issuing a HWC command to the hardware via mana_query_link_cfg(), which retrieves the SKU's maximum supported speed. These APIs when invoked on hardware that are older/do not support these APIs, the speed would be reported as UNKNOWN. Before: $ethtool enP30832s1 > Settings for enP30832s1: Supported ports: [ ] Supported link modes: Not reported Supported pause frame use: No Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: Unknown! Duplex: Full Auto-negotiation: off Port: Other PHYAD: 0 Transceiver: internal Link detected: yes After: $ethtool enP30832s1 > Settings for enP30832s1: Supported ports: [ ] Supported link modes: Not reported Supported pause frame use: No Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: 16000Mb/s Duplex: Full Auto-negotiation: off Port: Other PHYAD: 0 Transceiver: internal Link detected: yes Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Haiyang Zhang Reviewed-by: Shradha Gupta Reviewed-by: Saurabh Singh Sengar Reviewed-by: Long Li Link: https://patch.msgid.link/1750144656-2021-4-git-send-email-ernis@linux.microsoft.com Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 038b18340e51..e1030a7d2daa 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -533,6 +533,8 @@ struct mana_port_context { u16 port_idx; /* Currently configured speed (mbps) */ u32 speed; + /* Maximum speed supported by the SKU (mbps) */ + u32 max_speed; bool port_is_up; bool port_st_save; /* Saved port state */ -- cgit v1.2.3 From ca8ac489ca33c986ff02ee14c3e1c10b86355428 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Tue, 17 Jun 2025 00:17:36 -0700 Subject: net: mana: Handle unsupported HWC commands If any of the HWC commands are not recognized by the underlying hardware, the hardware returns the response header status of -1. Log the information using netdev_info_once to avoid multiple error logs in dmesg. Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Haiyang Zhang Reviewed-by: Shradha Gupta Reviewed-by: Saurabh Singh Sengar Reviewed-by: Dipayaan Roy Link: https://patch.msgid.link/1750144656-2021-5-git-send-email-ernis@linux.microsoft.com Signed-off-by: Paolo Abeni --- include/net/mana/gdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 6fe6cbcd512d..92ab85061df0 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -10,6 +10,7 @@ #include "shm_channel.h" #define GDMA_STATUS_MORE_ENTRIES 0x00000105 +#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff /* Structures labeled with "HW DATA" are exchanged with the hardware. All of * them are naturally aligned and hence don't need __packed. -- cgit v1.2.3 From 62ab7ac5be90392a9ac0955febab778ebf51bc0a Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 17 Jun 2025 16:57:55 +0100 Subject: dt-bindings: clock: renesas,r9a09g077: Add PCLKL core clock ID Add the Peripheral Module Clock L (PCLKL) core clock ID for the RZ/T2H (R9A09G077) SoC. This clock is used by peripherals such as IIC, WDT, and others. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250617155757.149597-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h index 1b22fe88dec7..f6e5f62b07c4 100644 --- a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h @@ -23,5 +23,6 @@ #define R9A09G077_CLK_PCLKGPTL 11 #define R9A09G077_CLK_PCLKH 12 #define R9A09G077_CLK_PCLKM 13 +#define R9A09G077_CLK_PCLKL 14 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ */ -- cgit v1.2.3 From 292bf6c5b8100ba4e16cd194bdc89785f6fb6f7a Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 9 Jun 2025 21:36:51 +0100 Subject: dt-bindings: clock: renesas,cpg-mssr: Document RZ/N2H support Document support for Module Standby and Software Reset found on the Renesas RZ/N2H (R9A09G087) SoC. The Module Standby and Software Reset IP is similar to that found on the RZ/T2H SoC. Signed-off-by: Lad Prabhakar Acked-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250609203656.333138-4-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- .../dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h new file mode 100644 index 000000000000..f28166d6015f --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2025 Renesas Electronics Corp. + */ + +#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G087_CPG_H__ +#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G087_CPG_H__ + +#include + +/* R9A09G087 CPG Core Clocks */ +#define R9A09G087_CLK_CA55C0 0 +#define R9A09G087_CLK_CA55C1 1 +#define R9A09G087_CLK_CA55C2 2 +#define R9A09G087_CLK_CA55C3 3 +#define R9A09G087_CLK_CA55S 4 +#define R9A09G087_CLK_CR52_CPU0 5 +#define R9A09G087_CLK_CR52_CPU1 6 +#define R9A09G087_CLK_CKIO 7 +#define R9A09G087_CLK_PCLKAH 8 +#define R9A09G087_CLK_PCLKAM 9 +#define R9A09G087_CLK_PCLKAL 10 +#define R9A09G087_CLK_PCLKGPTL 11 +#define R9A09G087_CLK_PCLKH 12 +#define R9A09G087_CLK_PCLKM 13 +#define R9A09G087_CLK_PCLKL 14 + +#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G087_CPG_H__ */ -- cgit v1.2.3 From ea92128fe7f6eef6ee5fcaaed521b1b2b5ab7c9a Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:13 -0700 Subject: iommufd: Apply obvious cosmetic fixes Run clang-format but exclude those not so obvious ones, which leaves us: - Align indentations - Add missing spaces - Remove unnecessary spaces - Remove unnecessary line wrappings Link: https://patch.msgid.link/r/9132e1ab45690ab1959c66bbb51ac5536a635388.1749882255.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 34b6e6ca4bfa..498c9a768506 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -171,8 +171,9 @@ static inline void iommufd_access_unpin_pages(struct iommufd_access *access, { } -static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, - void *data, size_t len, unsigned int flags) +static inline int iommufd_access_rw(struct iommufd_access *access, + unsigned long iova, void *data, size_t len, + unsigned int flags) { return -EOPNOTSUPP; } -- cgit v1.2.3 From fc9c40e3a4faa09dbd643ae1bdaf8ad006c3bc28 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:15 -0700 Subject: iommufd: Use enum iommu_viommu_type for type in struct iommufd_viommu Replace unsigned int, to make it clear. No functional changes. The viommu_alloc iommu op will be deprecated, so don't change that. Link: https://patch.msgid.link/r/6c6ba5c0cd381594f17ae74355872d78d7a022c0.1749882255.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 498c9a768506..ac98e49e44fe 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -101,7 +101,7 @@ struct iommufd_viommu { struct list_head veventqs; struct rw_semaphore veventqs_rwsem; - unsigned int type; + enum iommu_viommu_type type; }; /** -- cgit v1.2.3 From 187f146d5de65d50d90a4f49157d381d8ae32939 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:18 -0700 Subject: iommu: Introduce get_viommu_size and viommu_init ops So far, a vIOMMU object has been allocated by IOMMU driver and initialized with the driver-level structure, before it returns to the iommufd core for core-level structure initialization. It has been requiring iommufd core to expose some core structure/helpers in its driver.c file, which result in a size increase of this driver module. Meanwhile, IOMMU drivers are now requiring more vIOMMU-base structures for some advanced feature, such as the existing vDEVICE and a future HW_QUEUE. Initializing a core-structure later than driver-structure gives for-driver helpers some trouble, when they are used by IOMMU driver assuming that the new structure (including core) are fully initialized, for example: core: viommu = ops->viommu_alloc(); driver: // my_viommu is successfully allocated driver: my_viommu = iommufd_viommu_alloc(...); driver: // This may crash if it reads viommu->ictx driver: new = iommufd_new_viommu_helper(my_viommu->core ...); core: viommu->ictx = ucmd->ictx; core: ... To ease such a condition, allow the IOMMU driver to report the size of its vIOMMU structure, let the core allocate a vIOMMU object and initialize the core-level structure first, and then hand it over the driver to initialize its driver-level structure. Thus, this requires two new iommu ops, get_viommu_size and viommu_init, so iommufd core can communicate with drivers to replace the viommu_alloc op: core: viommu = ops->get_viommu_size(); driver: return VIOMMU_STRUCT_SIZE(); core: viommu->ictx = ucmd->ictx; // and others core: rc = ops->viommu_init(); driver: // This is safe now as viommu->ictx is inited driver: new = iommufd_new_viommu_helper(my_viommu->core ...); core: ... This also adds a VIOMMU_STRUCT_SIZE macro, for drivers to use, which would statically sanitize the driver structure. Link: https://patch.msgid.link/r/3ab52c5b622dad476c43b1b1f1636c8b902f1692.1749882255.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 15 +++++++++++++++ include/linux/iommufd.h | 6 ++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 156732807994..9be4ff370f1e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -14,6 +14,7 @@ #include #include #include +#include #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -596,6 +597,16 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains + * @get_viommu_size: Get the size of a driver-level vIOMMU structure for a given + * @dev corresponding to @viommu_type. Driver should return 0 + * if vIOMMU isn't supported accordingly. It is required for + * driver to use the VIOMMU_STRUCT_SIZE macro to sanitize the + * driver-level vIOMMU structure related to the core one + * @viommu_init: Init the driver-level struct of an iommufd_viommu on a physical + * IOMMU instance @viommu->iommu_dev, as the set of virtualization + * resources shared/passed to user space IOMMU instance. Associate + * it with a nesting @parent_domain. It is required for driver to + * set @viommu->ops pointing to its own viommu_ops * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind * the @dev, as the set of virtualization resources shared/passed * to user space IOMMU instance. And associate it with a nesting @@ -654,6 +665,10 @@ struct iommu_ops { int (*def_domain_type)(struct device *dev); + size_t (*get_viommu_size)(struct device *dev, + enum iommu_viommu_type viommu_type); + int (*viommu_init)(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain); struct iommufd_viommu *(*viommu_alloc)( struct device *dev, struct iommu_domain *parent_domain, struct iommufd_ctx *ictx, unsigned int viommu_type); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ac98e49e44fe..423e08963d90 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -229,6 +229,12 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, } #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ +#define VIOMMU_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member))) + /* * Helpers for IOMMU driver to allocate driver structures that will be freed by * the iommufd core. The free op will be called prior to freeing the memory. -- cgit v1.2.3 From f842ea208e43066c43e5e91e20fe8ce600df7055 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:23 -0700 Subject: iommu: Deprecate viommu_alloc op To ease the for-driver iommufd APIs, get_viommu_size and viommu_init ops are introduced. Now, those existing vIOMMU supported drivers implemented these two ops, replacing the viommu_alloc one. So, there is no use of it. Remove it from the headers and the viommu core. Link: https://patch.msgid.link/r/5b32d4499d7ed02a63e57a293c11b642d226ef8d.1749882255.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 11 ----------- include/linux/iommufd.h | 18 ------------------ 2 files changed, 29 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9be4ff370f1e..04548b18df28 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -607,14 +607,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * resources shared/passed to user space IOMMU instance. Associate * it with a nesting @parent_domain. It is required for driver to * set @viommu->ops pointing to its own viommu_ops - * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind - * the @dev, as the set of virtualization resources shared/passed - * to user space IOMMU instance. And associate it with a nesting - * @parent_domain. The @viommu_type must be defined in the header - * include/uapi/linux/iommufd.h - * It is required to call iommufd_viommu_alloc() helper for - * a bundled allocation of the core and the driver structures, - * using the given @ictx pointer. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity @@ -669,9 +661,6 @@ struct iommu_ops { enum iommu_viommu_type viommu_type); int (*viommu_init)(struct iommufd_viommu *viommu, struct iommu_domain *parent_domain); - struct iommufd_viommu *(*viommu_alloc)( - struct device *dev, struct iommu_domain *parent_domain, - struct iommufd_ctx *ictx, unsigned int viommu_type); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 423e08963d90..bf41b242b9f6 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -234,22 +234,4 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ ((drv_struct *)NULL)->member))) - -/* - * Helpers for IOMMU driver to allocate driver structures that will be freed by - * the iommufd core. The free op will be called prior to freeing the memory. - */ -#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ - ({ \ - drv_struct *ret; \ - \ - static_assert(__same_type(struct iommufd_viommu, \ - ((drv_struct *)NULL)->member)); \ - static_assert(offsetof(drv_struct, member.obj) == 0); \ - ret = (drv_struct *)_iommufd_object_alloc( \ - ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ - if (!IS_ERR(ret)) \ - ret->member.ops = viommu_ops; \ - ret; \ - }) #endif -- cgit v1.2.3 From 17a93473a552fc0ffdfb04e69a26946afd4a046a Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:24 -0700 Subject: iommufd: Move _iommufd_object_alloc out of driver.c Now, all driver structures will be allocated by the core, i.e. no longer a need of driver calling _iommufd_object_alloc. Thus, move it back. Before: text data bss dec hex filename 3024 180 0 3204 c84 drivers/iommu/iommufd/driver.o 9074 610 64 9748 2614 drivers/iommu/iommufd/main.o After: text data bss dec hex filename 2665 164 0 2829 b0d drivers/iommu/iommufd/driver.o 9410 618 64 10092 276c drivers/iommu/iommufd/main.o Link: https://patch.msgid.link/r/79e630c7b911930cf36e3c8a775a04e66c528d65.1749882255.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index bf41b242b9f6..2d1bf2f97ee3 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -190,9 +190,6 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) #endif /* CONFIG_IOMMUFD */ #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -201,13 +198,6 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, enum iommu_veventq_type type, void *event_data, size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ -static inline struct iommufd_object * -_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, - enum iommufd_object_type type) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { -- cgit v1.2.3 From 2c04d279e857e6c441593c282f978cebc5583fd9 Mon Sep 17 00:00:00 2001 From: Jun Miao Date: Wed, 18 Jun 2025 13:39:23 -0400 Subject: net: usb: Convert tasklet API to new bottom half workqueue mechanism Migrate tasklet APIs to the new bottom half workqueue mechanism. It replaces all occurrences of tasklet usage with the appropriate workqueue APIs throughout the usbnet driver. This transition ensures compatibility with the latest design and enhances performance. Signed-off-by: Jun Miao Link: https://patch.msgid.link/20250618173923.950510-1-jun.miao@intel.com Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0b9f1e598e3a..208682f77179 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -58,7 +58,7 @@ struct usbnet { unsigned interrupt_count; struct mutex interrupt_mutex; struct usb_anchor deferred; - struct tasklet_struct bh; + struct work_struct bh_work; struct work_struct kevent; unsigned long flags; -- cgit v1.2.3 From c7d78566bbd30544a0618a6ffbc97bc0ddac7035 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Tue, 17 Jun 2025 16:13:34 +0200 Subject: neighbour: add support for NUD_PERMANENT proxy entries As discussesd before in [0] proxy entries (which are more configuration than runtime data) should stay when the link (carrier) goes does down. This is what happens for regular neighbour entries. So lets fix this by: - storing in proxy entries the fact that it was added as NUD_PERMANENT - not removing NUD_PERMANENT proxy entries when the carrier goes down (same as how it's done in neigh_flush_dev() for regular neigh entries) [0]: https://lore.kernel.org/netdev/c584ef7e-6897-01f3-5b80-12b53f7b4bf4@kernel.org/ Signed-off-by: Nicolas Escande Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250617141334.3724863-1-nico.escande@gmail.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9a832cab5b1d..c7ce5ec7be23 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -182,6 +182,7 @@ struct pneigh_entry { netdevice_tracker dev_tracker; u32 flags; u8 protocol; + bool permanent; u32 key[]; }; -- cgit v1.2.3 From 27480a7c8f0274f8f2fc6c40e4522f38e52bd05f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 18 Jun 2025 01:32:44 -0700 Subject: net: add dev_dstats_rx_dropped_add() helper Introduce the dev_dstats_rx_dropped_add() helper to allow incrementing the rx_drops per-CPU statistic by an arbitrary value, rather than just one. This is useful for drivers or code paths that need to account for multiple dropped packets at once, such as when dropping entire queues. Reviewed-by: Joe Damato Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250618-netdevsim_stat-v4-3-19fe0d35e28e@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cbc4e54b7e4..03c26bb0fbbe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3016,6 +3016,16 @@ static inline void dev_dstats_rx_dropped(struct net_device *dev) u64_stats_update_end(&dstats->syncp); } +static inline void dev_dstats_rx_dropped_add(struct net_device *dev, + unsigned int packets) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_add(&dstats->rx_drops, packets); + u64_stats_update_end(&dstats->syncp); +} + static inline void dev_dstats_tx_add(struct net_device *dev, unsigned int len) { -- cgit v1.2.3 From 49c94af071fc6c9f5e1db52b3031dec28daa90c3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:16 -0400 Subject: ref_tracker: have callers pass output function to pr_ostream() In a later patch, we'll be adding a 3rd mechanism for outputting ref_tracker info via seq_file. Instead of a conditional, have the caller set a pointer to an output function in struct ostream. As part of this, the log prefix must be explicitly passed in, as it's too late for the pr_fmt macro. Reviewed-by: Andrew Lunn Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-3-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ref_tracker.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 8eac4f3d5254..a0a1ee43724f 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -6,6 +6,8 @@ #include #include +#define __ostream_printf __printf(2, 3) + struct ref_tracker; struct ref_tracker_dir { -- cgit v1.2.3 From aa7d26c3c3497258b712fb97221e775733a710b7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:17 -0400 Subject: ref_tracker: add a static classname string to each ref_tracker_dir A later patch in the series will be adding debugfs files for each ref_tracker that get created in ref_tracker_dir_init(). The format will be "class@%px". The current "name" string can vary between ref_tracker_dir objects of the same type, so it's not suitable for this purpose. Add a new "class" string to the ref_tracker dir that describes the the type of object (sans any individual info for that object). Also, in the i915 driver, gate the creation of debugfs files on whether the dentry pointer is still set to NULL. CI has shown that the ref_tracker_dir can be initialized more than once. Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-4-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ref_tracker.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index a0a1ee43724f..3968f993db81 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -19,6 +19,7 @@ struct ref_tracker_dir { bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ + const char *class; /* object classname */ char name[32]; #endif }; @@ -27,6 +28,7 @@ struct ref_tracker_dir { static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, + const char *class, const char *name) { INIT_LIST_HEAD(&dir->list); @@ -36,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, dir->dead = false; refcount_set(&dir->untracked, 1); refcount_set(&dir->no_tracker, 1); + dir->class = class; strscpy(dir->name, name, sizeof(dir->name)); stack_depot_init(); } @@ -60,6 +63,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir, static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, + const char *class, const char *name) { } -- cgit v1.2.3 From 65b584f5361163ba539d2c7122ca792c3cc87997 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:19 -0400 Subject: ref_tracker: automatically register a file in debugfs for a ref_tracker_dir Currently, there is no convenient way to see the info that the ref_tracking infrastructure collects. Attempt to create a file in debugfs when called from ref_tracker_dir_init(). The file is given the name "class@%px", as having the unmodified address is helpful for debugging. This should be safe since this directory is only accessible by root While ref_tracker_dir_init() is generally called from a context where sleeping is OK, ref_tracker_dir_exit() can be called from anywhere. Thus, dentry cleanup must be handled asynchronously. Add a new global xarray that has entries with the ref_tracker_dir pointer as the index and the corresponding debugfs dentry pointer as the value. Instead of removing the debugfs dentry, have ref_tracker_dir_exit() set a mark on the xarray entry and schedule a workqueue job. The workqueue job then walks the xarray looking for marked entries, and removes their xarray entries and the debugfs dentries. Because of this, the debugfs dentry can outlive the corresponding ref_tracker_dir. Have ref_tracker_debugfs_show() take extra care to ensure that it's safe to dereference the dir pointer before using it. Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-6-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ref_tracker.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 3968f993db81..28bbf436a8f4 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -26,6 +26,18 @@ struct ref_tracker_dir { #ifdef CONFIG_REF_TRACKER +#ifdef CONFIG_DEBUG_FS + +void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir); + +#else /* CONFIG_DEBUG_FS */ + +static inline void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, const char *class, @@ -40,6 +52,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, refcount_set(&dir->no_tracker, 1); dir->class = class; strscpy(dir->name, name, sizeof(dir->name)); + ref_tracker_dir_debugfs(dir); stack_depot_init(); } @@ -68,6 +81,10 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, { } +static inline void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) +{ +} + static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir) { } -- cgit v1.2.3 From d04992dc86a6c77b7d39a1ee10013aed7111e855 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:20 -0400 Subject: ref_tracker: add a way to create a symlink to the ref_tracker_dir debugfs file Add the ability for a subsystem to add a user-friendly symlink that points to a ref_tracker_dir's debugfs file. Add a separate debugfs_symlinks xarray and use that to track symlinks. The reaper workqueue job will remove symlinks before their corresponding dentries. Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-7-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ref_tracker.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 28bbf436a8f4..e1323de93bf6 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -29,6 +29,7 @@ struct ref_tracker_dir { #ifdef CONFIG_DEBUG_FS void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir); +void ref_tracker_dir_symlink(struct ref_tracker_dir *dir, const char *fmt, ...); #else /* CONFIG_DEBUG_FS */ @@ -36,6 +37,11 @@ static inline void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) { } +static inline __ostream_printf +void ref_tracker_dir_symlink(struct ref_tracker_dir *dir, const char *fmt, ...) +{ +} + #endif /* CONFIG_DEBUG_FS */ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, @@ -85,6 +91,11 @@ static inline void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) { } +static inline __ostream_printf +void ref_tracker_dir_symlink(struct ref_tracker_dir *dir, const char *fmt, ...) +{ +} + static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir) { } -- cgit v1.2.3 From 707bd05be75f65749c3f1695f4e362a89b3fcc7b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:22 -0400 Subject: ref_tracker: eliminate the ref_tracker_dir name field Now that we have dentries and the ability to create meaningful symlinks to them, don't keep a name string in each tracker. Switch the output format to print "class@address", and drop the name field. Also, add a kerneldoc header for ref_tracker_dir_init(). Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-9-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ref_tracker.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index e1323de93bf6..d10563afd91c 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -20,7 +20,6 @@ struct ref_tracker_dir { struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ const char *class; /* object classname */ - char name[32]; #endif }; @@ -44,10 +43,21 @@ void ref_tracker_dir_symlink(struct ref_tracker_dir *dir, const char *fmt, ...) #endif /* CONFIG_DEBUG_FS */ +/** + * ref_tracker_dir_init - initialize a ref_tracker dir + * @dir: ref_tracker_dir to be initialized + * @quarantine_count: max number of entries to be tracked + * @class: pointer to static string that describes object type + * + * Initialize a ref_tracker_dir. If debugfs is configured, then a file + * will also be created for it under the top-level ref_tracker debugfs + * directory. + * + * Note that @class must point to a static string. + */ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, - const char *class, - const char *name) + const char *class) { INIT_LIST_HEAD(&dir->list); INIT_LIST_HEAD(&dir->quarantine); @@ -57,7 +67,6 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, refcount_set(&dir->untracked, 1); refcount_set(&dir->no_tracker, 1); dir->class = class; - strscpy(dir->name, name, sizeof(dir->name)); ref_tracker_dir_debugfs(dir); stack_depot_init(); } @@ -82,8 +91,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir, static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, - const char *class, - const char *name) + const char *class) { } -- cgit v1.2.3 From 5f4081d6fafec6c9dca9e7990e783b70db854a5c Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 17 Apr 2025 15:44:22 +0200 Subject: clk: add a clk_hw helpers to get the clock device or device_node Add helpers to get the device or device_node associated with clk_hw. This can be used by clock drivers to access various device related functionality such as devres, dev_ prints, etc ... Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20250417-clk-hw-get-helpers-v1-1-7743e509612a@baylibre.com Reviewed-by: Brian Masney Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 2e6e603b7493..630705a47129 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1360,6 +1360,32 @@ void clk_hw_unregister(struct clk_hw *hw); /* helper functions */ const char *__clk_get_name(const struct clk *clk); const char *clk_hw_get_name(const struct clk_hw *hw); + +/** + * clk_hw_get_dev() - get device from an hardware clock. + * @hw: the clk_hw pointer to get the struct device from + * + * This is a helper to get the struct device associated with a hardware + * clock. Some clock controllers, such as the one registered with + * CLK_OF_DECLARE(), may have not provided a device pointer while + * registering the clock. + * + * Return: the struct device associated with the clock, or NULL if there + * is none. + */ +struct device *clk_hw_get_dev(const struct clk_hw *hw); + +/** + * clk_hw_get_of_node() - get device_node from a hardware clock. + * @hw: the clk_hw pointer to get the struct device_node from + * + * This is a helper to get the struct device_node associated with a + * hardware clock. + * + * Return: the struct device_node associated with the clock, or NULL + * if there is none. + */ +struct device_node *clk_hw_get_of_node(const struct clk_hw *hw); #ifdef CONFIG_COMMON_CLK struct clk_hw *__clk_get_hw(struct clk *clk); #else -- cgit v1.2.3 From aa34ecc42a2138af76642b68b53a5a07cb12fe43 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 28 May 2025 09:09:47 +0530 Subject: wifi: ieee80211: add Radio Measurement action fields Drivers that support Tx power insertion could examine the outgoing Radio measurement packet and depending on the packet type, the driver can insert specific data fields in it. These action field values will help drivers classify the action code within the Radio Measurement action packet. These action fields are defined in IEEE 802.11-2024 - Table 9-470, Radio Measurement Action field values. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20250528-add_rrm_action_code-v1-1-6b7c78b5bbaf@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22f39e5e2ff1..120de474a8bf 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4007,6 +4007,16 @@ enum ieee80211_s1g_actioncode { WLAN_S1G_TWT_INFORMATION = 11, }; +/* Radio measurement action codes as defined in IEEE 802.11-2024 - Table 9-470 */ +enum ieee80211_radio_measurement_actioncode { + WLAN_RM_ACTION_RADIO_MEASUREMENT_REQUEST = 0, + WLAN_RM_ACTION_RADIO_MEASUREMENT_REPORT = 1, + WLAN_RM_ACTION_LINK_MEASUREMENT_REQUEST = 2, + WLAN_RM_ACTION_LINK_MEASUREMENT_REPORT = 3, + WLAN_RM_ACTION_NEIGHBOR_REPORT_REQUEST = 4, + WLAN_RM_ACTION_NEIGHBOR_REPORT_RESPONSE = 5, +}; + #define IEEE80211_WEP_IV_LEN 4 #define IEEE80211_WEP_ICV_LEN 4 #define IEEE80211_CCMP_HDR_LEN 8 -- cgit v1.2.3 From df42bfc96e0ad90d243c0ee6b783a33bdb72a184 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Tue, 27 May 2025 14:11:43 +0530 Subject: wifi: cfg80211: Add utility API to get radio index from channel Add utility API cfg80211_get_radio_idx_by_chan() to retrieve the radio index corresponding to a given channel in a multi-radio wiphy. This utility function can be used when we want to check the radio-specific data for a channel in a multi-radio wiphy. For example, it can help determine the radio index required to handle a scan request. This index can then be used to decide whether the scan can proceed without interfering with ongoing DFS operations on another radio. Signed-off-by: Vasanthakumar Thiagarajan Co-developed-by: Raj Kumar Bhagat Signed-off-by: Raj Kumar Bhagat Link: https://patch.msgid.link/20250527-mlo-dfs-acs-v2-1-92c2f37c81d9@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1848dc8ec99..7719a90ab4d7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -9372,6 +9372,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data); +/** + * cfg80211_get_radio_idx_by_chan - get the radio index by the channel + * + * @wiphy: the wiphy + * @chan: channel for which the supported radio index is required + * + * Return: radio index on success or a negative error code + */ +int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, + const struct ieee80211_channel *chan); + /** * cfg80211_stop_iface - trigger interface disconnection -- cgit v1.2.3 From 5ae1fc4069578f50798f3372f36a3c13ee565b66 Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Wed, 4 Jun 2025 16:27:56 +0530 Subject: wifi: cfg80211: Improve the documentation for NL80211_CMD_ASSOC_MLO_RECONF The existing documentation for the NL80211_CMD_ASSOC_MLO_RECONF does not clearly explain handling of link reconfiguration request results from the driver. Add documentation to explain that the command is used as an event to notify userspace about added links information, and that the existing NL80211_CMD_LINKS_REMOVED command is used to notify userspace about removed links information. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20250604105757.2542-2-quic_kkavita@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e9ccf43fe3c6..e53840d009d1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1330,7 +1330,11 @@ * TID to Link mapping for downlink/uplink traffic. * * @NL80211_CMD_ASSOC_MLO_RECONF: For a non-AP MLD station, request to - * add/remove links to/from the association. + * add/remove links to/from the association. To indicate link + * reconfiguration request results from the driver, this command is also + * used as an event to notify userspace about the added links information. + * For notifying the removed links information, the existing + * %NL80211_CMD_LINKS_REMOVED command is used. * * @NL80211_CMD_EPCS_CFG: EPCS configuration for a station. Used by userland to * control EPCS configuration. Used to notify userland on the current state -- cgit v1.2.3 From 7c598c653ad465138ecc2fe64492633c541effef Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Wed, 4 Jun 2025 16:27:57 +0530 Subject: wifi: cfg80211: Add support for link reconfiguration negotiation offload to driver In the case of SME-in-driver, the driver can internally choose to update the links based on the AP MLD recommendation and do link reconfiguration negotiation with AP MLD. (e.g., After the driver processing the BSS Transition Management request frame received from the AP MLD with Neighbor Report containing Multi-Link element with recommended links information chooses to do link reconfiguration negotiation with AP MLD). To support this, extend cfg80211_mlo_reconf_add_done() and NL80211_CMD_ASSOC_MLO_RECONF to indicate added links information for driver-initiated link reconfiguration requests. For removed links, the driver indicates links information using the NL80211_CMD_LINKS_REMOVED event for driver-initiated cases, the same as supplicant initiated cases. For the driver-initiated case, cfg80211 will receive link reconfiguration result asynchronously from driver so holding BSSes of the accepted add links is needed in the event path. Also, no need of unhold call for the rejected add link BSSes since there was no hold call happened previously. Once the supplicant receives the NL80211_CMD_ASSOC_MLO_RECONF event, it needs to process the information about newly added links and install per-link group keys (e.g., GTK/IGTK/BIGTK etc.). In case of the SME-in-driver, using a vendor interface etc. to notify the supplicant to initiate a link reconfiguration request and then supplicant sending command to the cfg80211 can lead to race conditions. The correct design to avoid this is that the driver indicates the cfg80211 directly with the results of the link reconfiguration negotiation. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20250604105757.2542-3-quic_kkavita@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7719a90ab4d7..47b4235eea59 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -9747,6 +9747,11 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask); * struct cfg80211_mlo_reconf_done_data - MLO reconfiguration data * @buf: MLO Reconfiguration Response frame (header + body) * @len: length of the frame data + * @driver_initiated: Indicates whether the add links request is initiated by + * driver. This is set to true when the link reconfiguration request + * initiated by driver due to AP link recommendation requests + * (Ex: BTM (BSS Transition Management) request) handling offloaded to + * driver. * @added_links: BIT mask of links successfully added to the association * @links: per-link information indexed by link ID * @links.bss: the BSS that MLO reconfiguration was requested for, ownership of @@ -9759,6 +9764,7 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask); struct cfg80211_mlo_reconf_done_data { const u8 *buf; size_t len; + bool driver_initiated; u16 added_links; struct { struct cfg80211_bss *bss; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e53840d009d1..a289014abe37 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1334,7 +1334,11 @@ * reconfiguration request results from the driver, this command is also * used as an event to notify userspace about the added links information. * For notifying the removed links information, the existing - * %NL80211_CMD_LINKS_REMOVED command is used. + * %NL80211_CMD_LINKS_REMOVED command is used. This command is also used to + * notify userspace about newly added links for the current connection in + * case of AP-initiated link recommendation requests, received via + * a BTM (BSS Transition Management) request or a link reconfig notify + * frame, where the driver handles the link recommendation offload. * * @NL80211_CMD_EPCS_CFG: EPCS configuration for a station. Used by userland to * control EPCS configuration. Used to notify userland on the current state -- cgit v1.2.3 From 13ef21dffe7691a32c83a83d697d119c045536eb Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 16 Jun 2025 19:48:20 +0800 Subject: ASoC: SDCA: add support for HIDE entity properties and HID descriptor/report Add support for parsing the HIDE entity descriptor and HID descriptor/report Signed-off-by: Shuming Fan Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250616114820.855401-1-shumingf@realtek.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index eaedb54a8322..856b0f40ce5e 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -11,6 +11,7 @@ #include #include +#include struct device; struct sdca_entity; @@ -1040,6 +1041,32 @@ struct sdca_entity_ge { int num_modes; }; +/** + * struct sdca_entity_hide - information specific to HIDE Entities + * @hid: HID device structure + * @hidtx_ids: HIDTx Report ID + * @num_hidtx_ids: number of HIDTx Report ID + * @hidrx_ids: HIDRx Report ID + * @num_hidrx_ids: number of HIDRx Report ID + * @hide_reside_function_num: indicating which Audio Function Numbers within this Device + * @max_delay: the maximum time in microseconds allowed for the Device to change the ownership from Device to Host + * @af_number_list: which Audio Function Numbers within this Device are sending/receiving the messages in this HIDE + * @hid_desc: HID descriptor for the HIDE Entity + * @hid_report_desc: HID Report Descriptor for the HIDE Entity + */ +struct sdca_entity_hide { + struct hid_device *hid; + unsigned int *hidtx_ids; + int num_hidtx_ids; + unsigned int *hidrx_ids; + int num_hidrx_ids; + unsigned int hide_reside_function_num; + unsigned int max_delay; + unsigned int af_number_list[SDCA_MAX_FUNCTION_COUNT]; + struct hid_descriptor hid_desc; + unsigned char *hid_report_desc; +}; + /** * struct sdca_entity - information for one SDCA Entity * @label: String such as "OT 12". @@ -1055,6 +1082,7 @@ struct sdca_entity_ge { * @cs: Clock Source specific Entity properties. * @pde: Power Domain Entity specific Entity properties. * @ge: Group Entity specific Entity properties. + * @hide: HIDE Entity specific Entity properties. */ struct sdca_entity { const char *label; @@ -1071,6 +1099,7 @@ struct sdca_entity { struct sdca_entity_cs cs; struct sdca_entity_pde pde; struct sdca_entity_ge ge; + struct sdca_entity_hide hide; }; }; -- cgit v1.2.3 From 3421d46440ebe0865bec71dbd2330b4e17a425ab Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 16 Jun 2025 19:49:07 +0800 Subject: HID: core: Add bus define for SoundWire bus SDCA (SoundWire Device Class for Audio) uses HID to convey input events from peripheral devices. Add a bus define for the SoundWire bus to prepare support for this. Signed-off-by: Charles Keepax Signed-off-by: Shuming Fan Acked-by: Jiri Kosina Link: https://patch.msgid.link/20250616114907.855452-1-shumingf@realtek.com Signed-off-by: Mark Brown --- include/uapi/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 2557eb7b0561..127119c287cf 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -275,6 +275,7 @@ struct input_mask { #define BUS_CEC 0x1E #define BUS_INTEL_ISHTP 0x1F #define BUS_AMD_SFH 0x20 +#define BUS_SDW 0x21 /* * MT_TOOL types -- cgit v1.2.3 From 4051ead99888f101be92c7ce90d2de09aac6fd1c Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 20 Jun 2025 20:02:31 +0800 Subject: HID: rate-limit hid_warn to prevent log flooding Syzkaller can create many uhid devices that trigger repeated warnings like: "hid-generic xxxx: unknown main item tag 0x0" These messages can flood the system log, especially if a crash occurs (e.g., with a slow UART console, leading to soft lockups). To mitigate this, convert `hid_warn()` to use `dev_warn_ratelimited()`. This helps reduce log noise and improves system stability under fuzzing or faulty device scenarios. Signed-off-by: Li Chen Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 568a9d8c749b..7f260e0e2049 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1239,6 +1239,8 @@ void hid_quirks_exit(__u16 bus); dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_warn(hid, fmt, ...) \ dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_info(hid, fmt, ...) \ dev_info(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_dbg(hid, fmt, ...) \ -- cgit v1.2.3 From cf207eac06f661fb692f405d5ab8230df884ee52 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:20 +0800 Subject: KVM: TDX: Handle TDG.VP.VMCALL Handle TDVMCALL for GetQuote to generate a TD-Quote. GetQuote is a doorbell-like interface used by TDX guests to request VMM to generate a TD-Quote signed by a service hosting TD-Quoting Enclave operating on the host. A TDX guest passes a TD Report (TDREPORT_STRUCT) in a shared-memory area as parameter. Host VMM can access it and queue the operation for a service hosting TD-Quoting enclave. When completed, the Quote is returned via the same shared-memory area. KVM only checks the GPA from the TDX guest has the shared-bit set and drops the shared-bit before exiting to userspace to avoid bleeding the shared-bit into KVM's exit ABI. KVM forwards the request to userspace VMM (e.g. QEMU) and userspace VMM queues the operation asynchronously. KVM sets the return code according to the 'ret' field set by userspace to notify the TDX guest whether the request has been queued successfully or not. When the request has been queued successfully, the TDX guest can poll the status field in the shared-memory area to check whether the Quote generation is completed or not. When completed, the generated Quote is returned via the same buffer. Add KVM_EXIT_TDX as a new exit reason to userspace. Userspace is required to handle the KVM exit reason as the initial support for TDX, by reentering KVM to ensure that the TDVMCALL is complete. While at it, add a note that KVM_EXIT_HYPERCALL also requires reentry with KVM_RUN. Signed-off-by: Binbin Wu Tested-by: Mikko Ylinen Acked-by: Kai Huang [Adjust userspace API. - Paolo] Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d00b85cb168c..e23e7286ad1a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,22 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From 25e8b1dd4883e6c251c3db5b347f3c8ae4ade921 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:21 +0800 Subject: KVM: TDX: Exit to userspace for GetTdVmCallInfo Exit to userspace for TDG.VP.VMCALL via KVM_EXIT_TDX, to allow userspace to provide information about the support of TDVMCALLs when r12 is 1 for the TDVMCALLs beyond the GHCI base API. GHCI spec defines the GHCI base TDVMCALLs: , , , , <#VE.RequestMMIO>, , , and . They must be supported by VMM to support TDX guests. For GetTdVmCallInfo - When leaf (r12) to enumerate TDVMCALL functionality is set to 0, successful execution indicates all GHCI base TDVMCALLs listed above are supported. Update the KVM TDX document with the set of the GHCI base APIs. - When leaf (r12) to enumerate TDVMCALL functionality is set to 1, it indicates the TDX guest is querying the supported TDVMCALLs beyond the GHCI base TDVMCALLs. Exit to userspace to let userspace set the TDVMCALL sub-function bit(s) accordingly to the leaf outputs. KVM could set the TDVMCALL bit(s) supported by itself when the TDVMCALLs don't need support from userspace after returning from userspace and before entering guest. Currently, no such TDVMCALLs implemented, KVM just sets the values returned from userspace. Suggested-by: Paolo Bonzini Signed-off-by: Binbin Wu [Adjust userspace API. - Paolo] Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e23e7286ad1a..37891580d05d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -462,6 +462,11 @@ struct kvm_run { __u64 gpa; __u64 size; } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; }; } tdx; /* Fix the size of the union. */ -- cgit v1.2.3 From 4580dbef5ce0f95a4bd8ac2d007bc4fbf1539332 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 Jun 2025 13:28:08 -0400 Subject: KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -467,6 +467,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ -- cgit v1.2.3 From 64f7548aad63d2fbca2eeb6eb33361c218ebd5a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 21:19:40 +0200 Subject: lib/crypto: sha256: Mark sha256_choose_blocks as __always_inline When the compiler chooses to not inline sha256_choose_blocks() in the purgatory code, it fails to link against the missing CPU specific version: x86_64-linux-ld: arch/x86/purgatory/purgatory.ro: in function `sha256_choose_blocks.part.0': sha256.c:(.text+0x6a6): undefined reference to `irq_fpu_usable' sha256.c:(.text+0x6c7): undefined reference to `sha256_blocks_arch' sha256.c:(.text+0x6cc): undefined reference to `sha256_blocks_simd' Mark this function as __always_inline to prevent this, same as sha256_finup(). Fixes: 5b90a779bc54 ("crypto: lib/sha256 - Add helpers for block-based shash") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250620191952.1867578-1-arnd@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index b9bccd3ff57f..21a27fd5e198 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -static inline void sha256_choose_blocks( +static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, bool force_generic, bool force_simd) { -- cgit v1.2.3 From 530a8ba71b4c3b7fcee323dd997f4bab1be1a6ba Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 14:35:35 -0700 Subject: KVM: Bound the number of dirty ring entries in a single reset at INT_MAX Cap the number of ring entries that are reset in a single ioctl to INT_MAX to ensure userspace isn't confused by a wrap into negative space, and so that, in a truly pathological scenario, KVM doesn't miss a TLB flush due to the count wrapping to zero. While the size of the ring is fixed at 0x10000 entries and KVM (currently) supports at most 4096, userspace is allowed to harvest entries from the ring while the reset is in-progress, i.e. it's possible for the ring to always have harvested entries. Opportunistically return an actual error code from the helper so that a future fix to handle pending signals can gracefully return -EINTR. Drop the function comment now that the return code is a stanard 0/-errno (and because a future commit will add a proper lockdep assertion). Opportunistically drop a similarly stale comment for kvm_dirty_ring_push(). Cc: Peter Xu Cc: Yan Zhao Cc: Maxim Levitsky Cc: Binbin Wu Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking") Reviewed-by: James Houghton Reviewed-by: Binbin Wu Reviewed-by: Yan Zhao Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20250516213540.2546077-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_dirty_ring.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index da4d9b5f58f1..eb10d87adf7d 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -49,9 +49,10 @@ static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *r } static inline int kvm_dirty_ring_reset(struct kvm *kvm, - struct kvm_dirty_ring *ring) + struct kvm_dirty_ring *ring, + int *nr_entries_reset) { - return 0; + return -ENOENT; } static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, @@ -77,17 +78,8 @@ bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm); u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm); int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring, int index, u32 size); - -/* - * called with kvm->slots_lock held, returns the number of - * processed pages. - */ -int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); - -/* - * returns =0: successfully pushed - * <0: unable to push, need to wait - */ +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring, + int *nr_entries_reset); void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset); bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From cd4178d19420359554e3da6fd77ecfd0f58067ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Jun 2025 16:51:47 -0700 Subject: KVM: arm64: WARN if unmapping a vLPI fails in any path When unmapping a vLPI, WARN if nullifying vCPU affinity fails, not just if failure occurs when freeing an ITE. If undoing vCPU affinity fails, then odds are very good that vLPI state tracking has has gotten out of whack, i.e. that KVM and the GIC disagree on the state of an IRQ/vLPI. At best, inconsistent state means there is a lurking bug/flaw somewhere. At worst, the inconsistency could eventually be fatal to the host, e.g. if an ITS command fails because KVM's view of things doesn't match reality/hardware. Note, only the call from kvm_arch_irq_bypass_del_producer() by way of kvm_vgic_v4_unset_forwarding() doesn't already WARN. Common KVM's kvm_irq_routing_update() WARNs if kvm_arch_update_irqfd_routing() fails. For that path, if its_unmap_vlpi() fails in kvm_vgic_v4_unset_forwarding(), the only possible causes are that the GIC doesn't have a v4 ITS (from its_irq_set_vcpu_affinity()): /* Need a v4 ITS */ if (!is_v4(its_dev->its)) return -EINVAL; guard(raw_spinlock)(&its_dev->event_map.vlpi_lock); /* Unmap request? */ if (!info) return its_vlpi_unmap(d); or that KVM has gotten out of sync with the GIC/ITS (from its_vlpi_unmap()): if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) return -EINVAL; All of the above failure scenarios are warnable offences, as they should never occur absent a kernel/KVM bug. Acked-by: Oliver Upton Link: https://lore.kernel.org/all/aFWY2LTVIxz5rfhh@google.com Signed-off-by: Sean Christopherson --- include/linux/irqchip/arm-gic-v4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 7f1f11a5e4e4..0b0887099fd7 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -146,7 +146,7 @@ int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -int its_unmap_vlpi(int irq); +void its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); int its_prop_update_vsgi(int irq, u8 priority, bool group); -- cgit v1.2.3 From 2b521d86ee80a436a92445b8206d38d75aeb39ea Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 16:07:29 -0700 Subject: irqbypass: Take ownership of producer/consumer token tracking Move ownership of IRQ bypass token tracking into irqbypass.ko, and explicitly require callers to pass an eventfd_ctx structure instead of a completely opaque token. Relying on producers and consumers to set the token appropriately is error prone, and hiding the fact that the token must be an eventfd_ctx pointer (for all intents and purposes) unnecessarily obfuscates the code and makes it more brittle. Reviewed-by: Kevin Tian Acked-by: Michael S. Tsirkin Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250516230734.2564775-4-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/irqbypass.h | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h index 9bdb2a781841..1b57d15ac4cf 100644 --- a/include/linux/irqbypass.h +++ b/include/linux/irqbypass.h @@ -10,6 +10,7 @@ #include +struct eventfd_ctx; struct irq_bypass_consumer; /* @@ -18,20 +19,20 @@ struct irq_bypass_consumer; * The IRQ bypass manager is a simple set of lists and callbacks that allows * IRQ producers (ex. physical interrupt sources) to be matched to IRQ * consumers (ex. virtualization hardware that allows IRQ bypass or offload) - * via a shared token (ex. eventfd_ctx). Producers and consumers register - * independently. When a token match is found, the optional @stop callback - * will be called for each participant. The pair will then be connected via - * the @add_* callbacks, and finally the optional @start callback will allow - * any final coordination. When either participant is unregistered, the - * process is repeated using the @del_* callbacks in place of the @add_* - * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings - * are not supported. + * via a shared eventfd_ctx. Producers and consumers register independently. + * When a producer and consumer are paired, i.e. an eventfd match is found, the + * optional @stop callback will be called for each participant. The pair will + * then be connected via the @add_* callbacks, and finally the optional @start + * callback will allow any final coordination. When either participant is + * unregistered, the process is repeated using the @del_* callbacks in place of + * the @add_* callbacks. eventfds must be unique per producer/consumer, 1:N + * pairings are not supported. */ /** * struct irq_bypass_producer - IRQ bypass producer definition * @node: IRQ bypass manager private list management - * @token: opaque token to match between producer and consumer (non-NULL) + * @eventfd: eventfd context used to match producers and consumers * @irq: Linux IRQ number for the producer device * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) @@ -44,7 +45,7 @@ struct irq_bypass_consumer; */ struct irq_bypass_producer { struct list_head node; - void *token; + struct eventfd_ctx *eventfd; int irq; int (*add_consumer)(struct irq_bypass_producer *, struct irq_bypass_consumer *); @@ -57,7 +58,7 @@ struct irq_bypass_producer { /** * struct irq_bypass_consumer - IRQ bypass consumer definition * @node: IRQ bypass manager private list management - * @token: opaque token to match between producer and consumer (non-NULL) + * @eventfd: eventfd context used to match producers and consumers * @add_producer: Connect the IRQ consumer to an IRQ producer * @del_producer: Disconnect the IRQ consumer from an IRQ producer * @stop: Perform any quiesce operations necessary prior to add/del (optional) @@ -70,7 +71,7 @@ struct irq_bypass_producer { */ struct irq_bypass_consumer { struct list_head node; - void *token; + struct eventfd_ctx *eventfd; int (*add_producer)(struct irq_bypass_consumer *, struct irq_bypass_producer *); void (*del_producer)(struct irq_bypass_consumer *, @@ -79,9 +80,11 @@ struct irq_bypass_consumer { void (*start)(struct irq_bypass_consumer *); }; -int irq_bypass_register_producer(struct irq_bypass_producer *); -void irq_bypass_unregister_producer(struct irq_bypass_producer *); -int irq_bypass_register_consumer(struct irq_bypass_consumer *); -void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); +int irq_bypass_register_producer(struct irq_bypass_producer *producer, + struct eventfd_ctx *eventfd); +void irq_bypass_unregister_producer(struct irq_bypass_producer *producer); +int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer, + struct eventfd_ctx *eventfd); +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer); #endif /* IRQBYPASS_H */ -- cgit v1.2.3 From add57f493e0893ac0fb4acbdc441918d3e800f10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 16:07:30 -0700 Subject: irqbypass: Explicitly track producer and consumer bindings Explicitly track IRQ bypass producer:consumer bindings. This will allow making removal an O(1) operation; searching through the list to find information that is trivially tracked (and useful for debug) is wasteful. Reviewed-by: Kevin Tian Acked-by: Michael S. Tsirkin Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250516230734.2564775-5-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/irqbypass.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h index 1b57d15ac4cf..b28197c87483 100644 --- a/include/linux/irqbypass.h +++ b/include/linux/irqbypass.h @@ -29,10 +29,13 @@ struct irq_bypass_consumer; * pairings are not supported. */ +struct irq_bypass_consumer; + /** * struct irq_bypass_producer - IRQ bypass producer definition * @node: IRQ bypass manager private list management * @eventfd: eventfd context used to match producers and consumers + * @consumer: The connected consumer (NULL if no connection) * @irq: Linux IRQ number for the producer device * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) @@ -46,6 +49,7 @@ struct irq_bypass_consumer; struct irq_bypass_producer { struct list_head node; struct eventfd_ctx *eventfd; + struct irq_bypass_consumer *consumer; int irq; int (*add_consumer)(struct irq_bypass_producer *, struct irq_bypass_consumer *); @@ -59,6 +63,7 @@ struct irq_bypass_producer { * struct irq_bypass_consumer - IRQ bypass consumer definition * @node: IRQ bypass manager private list management * @eventfd: eventfd context used to match producers and consumers + * @producer: The connected producer (NULL if no connection) * @add_producer: Connect the IRQ consumer to an IRQ producer * @del_producer: Disconnect the IRQ consumer from an IRQ producer * @stop: Perform any quiesce operations necessary prior to add/del (optional) @@ -72,6 +77,8 @@ struct irq_bypass_producer { struct irq_bypass_consumer { struct list_head node; struct eventfd_ctx *eventfd; + struct irq_bypass_producer *producer; + int (*add_producer)(struct irq_bypass_consumer *, struct irq_bypass_producer *); void (*del_producer)(struct irq_bypass_consumer *, -- cgit v1.2.3 From 8394b32faecd9c63b3c436e78e62519e9548e530 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 16:07:33 -0700 Subject: irqbypass: Use xarray to track producers and consumers Track IRQ bypass producers and consumers using an xarray to avoid the O(2n) insertion time associated with walking a list to check for duplicate entries, and to search for an partner. At low (tens or few hundreds) total producer/consumer counts, using a list is faster due to the need to allocate backing storage for xarray. But as count creeps into the thousands, xarray wins easily, and can provide several orders of magnitude better latency at high counts. E.g. hundreds of nanoseconds vs. hundreds of milliseconds. Cc: Oliver Upton Cc: David Matlack Cc: Like Xu Cc: Binbin Wu Reported-by: Yong He Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217379 Link: https://lore.kernel.org/all/20230801115646.33990-1-likexu@tencent.com Reviewed-by: Kevin Tian Acked-by: Michael S. Tsirkin Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250516230734.2564775-8-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/irqbypass.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h index b28197c87483..cd64fcaa88fe 100644 --- a/include/linux/irqbypass.h +++ b/include/linux/irqbypass.h @@ -33,7 +33,6 @@ struct irq_bypass_consumer; /** * struct irq_bypass_producer - IRQ bypass producer definition - * @node: IRQ bypass manager private list management * @eventfd: eventfd context used to match producers and consumers * @consumer: The connected consumer (NULL if no connection) * @irq: Linux IRQ number for the producer device @@ -47,7 +46,6 @@ struct irq_bypass_consumer; * for a physical device assigned to a VM. */ struct irq_bypass_producer { - struct list_head node; struct eventfd_ctx *eventfd; struct irq_bypass_consumer *consumer; int irq; @@ -61,7 +59,6 @@ struct irq_bypass_producer { /** * struct irq_bypass_consumer - IRQ bypass consumer definition - * @node: IRQ bypass manager private list management * @eventfd: eventfd context used to match producers and consumers * @producer: The connected producer (NULL if no connection) * @add_producer: Connect the IRQ consumer to an IRQ producer @@ -75,7 +72,6 @@ struct irq_bypass_producer { * portions of the interrupt handling to the VM. */ struct irq_bypass_consumer { - struct list_head node; struct eventfd_ctx *eventfd; struct irq_bypass_producer *producer; -- cgit v1.2.3 From 23b54381cee2928e8b5622e654ca4516f30d2f1a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 May 2025 16:07:34 -0700 Subject: irqbypass: Require producers to pass in Linux IRQ number during registration Pass in the Linux IRQ associated with an IRQ bypass producer instead of relying on the caller to set the field prior to registration, as there's no benefit to relying on callers to do the right thing. Take care to set producer->irq before __connect(), as KVM expects the IRQ to be valid as soon as a connection is possible. Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20250516230734.2564775-9-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/irqbypass.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h index cd64fcaa88fe..ede1fa938152 100644 --- a/include/linux/irqbypass.h +++ b/include/linux/irqbypass.h @@ -84,7 +84,7 @@ struct irq_bypass_consumer { }; int irq_bypass_register_producer(struct irq_bypass_producer *producer, - struct eventfd_ctx *eventfd); + struct eventfd_ctx *eventfd, int irq); void irq_bypass_unregister_producer(struct irq_bypass_producer *producer); int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer, struct eventfd_ctx *eventfd); -- cgit v1.2.3 From e295d2e7fbe69ddec772c951c466dfbfc1c96818 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:40 -0700 Subject: KVM: x86: Trigger I/O APIC route rescan in kvm_arch_irq_routing_update() Trigger the I/O APIC route rescan that's performed for a split IRQ chip after userspace updates IRQ routes in kvm_arch_irq_routing_update(), i.e. before dropping kvm->irq_lock. Calling kvm_make_all_cpus_request() under a mutex is perfectly safe, and the smp_wmb()+smp_mb__after_atomic() pair in __kvm_make_request()+kvm_check_request() ensures the new routing is visible to vCPUs prior to the request being visible to vCPUs. In all likelihood, commit b053b2aef25d ("KVM: x86: Add EOI exit bitmap inference") somewhat arbitrarily made the request outside of irq_lock to avoid holding irq_lock any longer than is strictly necessary. And then commit abdb080f7ac8 ("kvm/irqchip: kvm_arch_irq_routing_update renaming split") took the easy route of adding another arch hook instead of risking a functional change. Note, the call to synchronize_srcu_expedited() does NOT provide ordering guarantees with respect to vCPUs scanning the new routing; as above, the request infrastructure provides the necessary ordering. I.e. there's no need to wait for kvm_scan_ioapic_routes() to complete if it's actively running, because regardless of whether it grabs the old or new table, the vCPU will have another KVM_REQ_SCAN_IOAPIC pending, i.e. will rescan again and see the new mappings. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3bde4fb5c6aa..9461517b4e62 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1024,14 +1024,10 @@ void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); -void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } -static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) -{ -} #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP -- cgit v1.2.3 From 77a74b8ff41ae620f5a5d727d596b670b7b9994e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:48 -0700 Subject: KVM: x86: Move kvm_{request,free}_irq_source_id() to i8254.c (PIT) Move kvm_{request,free}_irq_source_id() to i8254.c, i.e. the dedicated PIT emulation file, in anticipation of removing them entirely in favor of hardcoding the PIT's "requested" source ID (the source ID can only ever be '2', and the request can never fail). No functional change intended. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-10-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9461517b4e62..cba8fc4529e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1784,8 +1784,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -int kvm_request_irq_source_id(struct kvm *kvm); -void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* -- cgit v1.2.3 From 61423c413a746fd5fe5b0d865ea722e11b01105e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:49 -0700 Subject: KVM: x86: Hardcode the PIT IRQ source ID to '2' Hardcode the PIT's source IRQ ID to '2' instead of "finding" that bit 2 is always the first available bit in irq_sources_bitmap. Bits 0 and 1 are set/reserved by kvm_arch_init_vm(), i.e. long before kvm_create_pit() can be invoked, and KVM allows at most one in-kernel PIT instance, i.e. it's impossible for the PIT to find a different free bit (there are no other users of kvm_request_irq_source_id(). Delete the now-defunct irq_sources_bitmap and all its associated code. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-11-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cba8fc4529e8..4ff5ea29e343 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,6 +190,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 +#define KVM_PIT_IRQ_SOURCE_ID 2 extern struct mutex kvm_lock; extern struct list_head vm_list; -- cgit v1.2.3 From 2c938850d9d18cbd6484a66588fac95d74d951fd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:52 -0700 Subject: KVM: Move x86-only tracepoints to x86's trace.h Move the I/O APIC tracepoints and trace_kvm_msi_set_irq() to x86, as __KVM_HAVE_IOAPIC is just code for "x86", and trace_kvm_msi_set_irq() isn't unique to I/O APIC emulation. Opportunistically clean up the absurdly messy #includes in ioapic.c. No functional change intended. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-14-seanjc@google.com Signed-off-by: Sean Christopherson --- include/trace/events/kvm.h | 77 ---------------------------------------------- 1 file changed, 77 deletions(-) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index fc7d0f8ff078..96e581900c8e 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -85,83 +85,6 @@ TRACE_EVENT(kvm_set_irq, #endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ #if defined(__KVM_HAVE_IOAPIC) -#define kvm_deliver_mode \ - {0x0, "Fixed"}, \ - {0x1, "LowPrio"}, \ - {0x2, "SMI"}, \ - {0x3, "Res3"}, \ - {0x4, "NMI"}, \ - {0x5, "INIT"}, \ - {0x6, "SIPI"}, \ - {0x7, "ExtINT"} - -TRACE_EVENT(kvm_ioapic_set_irq, - TP_PROTO(__u64 e, int pin, bool coalesced), - TP_ARGS(e, pin, coalesced), - - TP_STRUCT__entry( - __field( __u64, e ) - __field( int, pin ) - __field( bool, coalesced ) - ), - - TP_fast_assign( - __entry->e = e; - __entry->pin = pin; - __entry->coalesced = coalesced; - ), - - TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", - __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, - __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), - (__entry->e & (1<<11)) ? "logical" : "physical", - (__entry->e & (1<<15)) ? "level" : "edge", - (__entry->e & (1<<16)) ? "|masked" : "", - __entry->coalesced ? " (coalesced)" : "") -); - -TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, - TP_PROTO(__u64 e), - TP_ARGS(e), - - TP_STRUCT__entry( - __field( __u64, e ) - ), - - TP_fast_assign( - __entry->e = e; - ), - - TP_printk("dst %x vec %u (%s|%s|%s%s)", - (u8)(__entry->e >> 56), (u8)__entry->e, - __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), - (__entry->e & (1<<11)) ? "logical" : "physical", - (__entry->e & (1<<15)) ? "level" : "edge", - (__entry->e & (1<<16)) ? "|masked" : "") -); - -TRACE_EVENT(kvm_msi_set_irq, - TP_PROTO(__u64 address, __u64 data), - TP_ARGS(address, data), - - TP_STRUCT__entry( - __field( __u64, address ) - __field( __u64, data ) - ), - - TP_fast_assign( - __entry->address = address; - __entry->data = data; - ), - - TP_printk("dst %llx vec %u (%s|%s|%s%s)", - (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00), - (u8)__entry->data, - __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), - (__entry->address & (1<<2)) ? "logical" : "physical", - (__entry->data & (1<<15)) ? "level" : "edge", - (__entry->address & (1<<3)) ? "|rh" : "") -); #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ -- cgit v1.2.3 From 628a27731e3f36de7ddce226f7e09ee70e40ed66 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:53 -0700 Subject: KVM: x86: Add CONFIG_KVM_IOAPIC to allow disabling in-kernel I/O APIC Add a Kconfig to allow building KVM without support for emulating a I/O APIC, PIC, and PIT, which is desirable for deployments that effectively don't support a fully in-kernel IRQ chip, i.e. never expect any VMM to create an in-kernel I/O APIC. E.g. compiling out support eliminates a few thousand lines of guest-facing code and gives security folks warm fuzzies. As a bonus, wrapping relevant paths with CONFIG_KVM_IOAPIC #ifdefs makes it much easier for readers to understand which bits and pieces exist specifically for fully in-kernel IRQ chips. Opportunistically convert all two in-kernel uses of __KVM_HAVE_IOAPIC to CONFIG_KVM_IOAPIC, e.g. rather than add a second #ifdef to generate a stub for kvm_arch_post_irq_routing_update(). Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-15-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- include/trace/events/kvm.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4ff5ea29e343..3b5575d0b574 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1023,7 +1023,7 @@ void kvm_unlock_all_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 96e581900c8e..1065a81ca57f 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -84,14 +84,14 @@ TRACE_EVENT(kvm_set_irq, ); #endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ -#if defined(__KVM_HAVE_IOAPIC) +#ifdef CONFIG_KVM_IOAPIC #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ {KVM_IRQCHIP_IOAPIC, "IOAPIC"} -#endif /* defined(__KVM_HAVE_IOAPIC) */ +#endif /* CONFIG_KVM_IOAPIC */ #if defined(CONFIG_HAVE_KVM_IRQCHIP) -- cgit v1.2.3 From 141db6cd79e2d71fce3049347177f98a233d8eb2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 14:35:54 -0700 Subject: KVM: Squash two CONFIG_HAVE_KVM_IRQCHIP #ifdefs into one Squash two #idef CONFIG_HAVE_KVM_IRQCHIP regions in KVM's trace events, as the only code outside of the #idefs depends on CONFIG_KVM_IOAPIC, and that Kconfig only exists for x86, which unconditionally selects HAVE_KVM_IRQCHIP. No functional change intended. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-16-seanjc@google.com Signed-off-by: Sean Christopherson --- include/trace/events/kvm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 1065a81ca57f..0b6b79b1a1bc 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -82,7 +82,6 @@ TRACE_EVENT(kvm_set_irq, TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); -#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ #ifdef CONFIG_KVM_IOAPIC @@ -93,8 +92,6 @@ TRACE_EVENT(kvm_set_irq, #endif /* CONFIG_KVM_IOAPIC */ -#if defined(CONFIG_HAVE_KVM_IRQCHIP) - #ifdef kvm_irqchips #define kvm_ack_irq_string "irqchip %s pin %u" #define kvm_ack_irq_parm __print_symbolic(__entry->irqchip, kvm_irqchips), __entry->pin -- cgit v1.2.3 From cb210737675ef4c1ad88721e84558eeb2f199312 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:06 -0700 Subject: KVM: Pass new routing entries and irqfd when updating IRTEs When updating IRTEs in response to a GSI routing or IRQ bypass change, pass the new/current routing information along with the associated irqfd. This will allow KVM x86 to harden, simplify, and deduplicate its code. Since adding/removing a bypass producer is now conveniently protected with irqfds.lock, i.e. can't run concurrently with kvm_irq_routing_update(), use the routing information cached in the irqfd instead of looking up the information in the current GSI routing tables. Opportunistically convert an existing printk() to pr_info() and put its string onto a single line (old code that strictly adhered to 80 chars). Link: https://lore.kernel.org/r/20250611224604.313496-5-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b5575d0b574..a4160c1c0c6b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2401,6 +2401,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) +struct kvm_kernel_irqfd; + bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); @@ -2408,8 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new); bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, struct kvm_kernel_irq_routing_entry *); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ -- cgit v1.2.3 From 05c5e23657e1d61c271c2f4a3a21d4d630b18a9b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:07 -0700 Subject: KVM: SVM: Track per-vCPU IRTEs using kvm_kernel_irqfd structure Track the IRTEs that are posting to an SVM vCPU via the associated irqfd structure and GSI routing instead of dynamically allocating a separate data structure. In addition to eliminating an atomic allocation, this will allow hoisting much of the IRTE update logic to common x86. Cc: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-6-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_irqfd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 8ad43692e3bb..6510a48e62aa 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -59,6 +59,9 @@ struct kvm_kernel_irqfd { struct work_struct shutdown; struct irq_bypass_consumer consumer; struct irq_bypass_producer *producer; + + struct list_head vcpu_list; + void *irq_bypass_data; }; #endif /* __LINUX_KVM_IRQFD_H */ -- cgit v1.2.3 From 0a917e9d4b7070fafcbf7a8ec32d2aa444b4e757 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:08 -0700 Subject: KVM: SVM: Delete IRTE link from previous vCPU before setting new IRTE Delete the previous per-vCPU IRTE link prior to modifying the IRTE. If forcing the IRTE back to remapped mode fails, the IRQ is already broken; keeping stale metadata won't change that, and the IOMMU should be sufficiently paranoid to sanitize the IRTE when the IRQ is freed and reallocated. This will allow hoisting the vCPU tracking to common x86, which in turn will allow most of the IRTE update code to be deduplicated. Tested-by: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-7-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_irqfd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 6510a48e62aa..361c07f4466d 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -60,6 +60,7 @@ struct kvm_kernel_irqfd { struct irq_bypass_consumer consumer; struct irq_bypass_producer *producer; + struct kvm_vcpu *irq_bypass_vcpu; struct list_head vcpu_list; void *irq_bypass_data; }; -- cgit v1.2.3 From 1da19c5ce0533796179d9e1b55e64bf78478c4c1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:09 -0700 Subject: iommu/amd: KVM: SVM: Delete now-unused cached/previous GA tag fields Delete the amd_ir_data.prev_ga_tag field now that all usage is superfluous. Reviewed-by: Vasant Hegde Tested-by: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-8-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/amd-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 062fbd4c9b77..1f9b13d803c5 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -19,8 +19,8 @@ struct amd_iommu; */ struct amd_iommu_pi_data { u32 ga_tag; - u32 prev_ga_tag; u64 base; + bool is_guest_mode; struct vcpu_data *vcpu_data; void *ir_data; -- cgit v1.2.3 From 741e595f02fe4386914b7ef656a06f7209480ca9 Mon Sep 17 00:00:00 2001 From: Liam Merwick Date: Mon, 9 Jun 2025 09:11:20 +0000 Subject: KVM: Add trace_kvm_vm_set_mem_attributes() Add a tracing function that, for a guest memory range, displays the start and end addresses plus the per-page attributes being set. Signed-off-by: Liam Merwick Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20250609091121.2497429-3-liam.merwick@oracle.com Signed-off-by: Sean Christopherson --- include/trace/events/kvm.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index fc7d0f8ff078..40b20d682d47 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -473,6 +473,33 @@ TRACE_EVENT(kvm_dirty_ring_exit, TP_printk("vcpu %d", __entry->vcpu_id) ); +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +/* + * @start: Starting address of guest memory range + * @end: End address of guest memory range + * @attr: The value of the attribute being set. + */ +TRACE_EVENT(kvm_vm_set_mem_attributes, + TP_PROTO(gfn_t start, gfn_t end, unsigned long attr), + TP_ARGS(start, end, attr), + + TP_STRUCT__entry( + __field(gfn_t, start) + __field(gfn_t, end) + __field(unsigned long, attr) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + __entry->attr = attr; + ), + + TP_printk("%#016llx -- %#016llx [0x%lx]", + __entry->start, __entry->end, __entry->attr) +); +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + TRACE_EVENT(kvm_unmap_hva_range, TP_PROTO(unsigned long start, unsigned long end), TP_ARGS(start, end), -- cgit v1.2.3 From 6e6558a6bc418f1478c5dc8609d03805364e0cb9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 15:33:10 -1000 Subject: sched_ext, sched/core: Factor out struct scx_task_group More sched_ext fields will be added to struct task_group. In preparation, factor out sched_ext fields into struct scx_task_group to reduce clutter in the common header. No functional changes. Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index f7545430a548..eda89acdb7ab 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -214,4 +214,12 @@ static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} static inline void scx_softlockup(u32 dur_s) {} #endif /* CONFIG_SCHED_CLASS_EXT */ + +struct scx_task_group { +#ifdef CONFIG_EXT_GROUP_SCHED + u32 flags; /* SCX_TG_* */ + u32 weight; +#endif +}; + #endif /* _LINUX_SCHED_EXT_H */ -- cgit v1.2.3 From ddceadce63d9cb752c2472e220ded05cabaf7971 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 15:34:22 -1000 Subject: sched_ext: Add support for cgroup bandwidth control interface From 077814f57f8acce13f91dc34bbd2b7e4911fbf25 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 15:06:47 -1000 - Add CONFIG_GROUP_SCHED_BANDWIDTH which is selected by both CONFIG_CFS_BANDWIDTH and EXT_GROUP_SCHED. - Put bandwidth control interface files for both cgroup v1 and v2 under CONFIG_GROUP_SCHED_BANDWIDTH. - Update tg_bandwidth() to fetch configuration parameters from fair if CONFIG_CFS_BANDWIDTH, SCX otherwise. - Update tg_set_bandwidth() to update the parameters for both fair and SCX. - Add bandwidth control parameters to struct scx_cgroup_init_args. - Add sched_ext_ops.cgroup_set_bandwidth() which is invoked on bandwidth control parameter updates. - Update scx_qmap and maximal selftest to test the new feature. Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index eda89acdb7ab..8b92842776cb 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -219,6 +219,9 @@ struct scx_task_group { #ifdef CONFIG_EXT_GROUP_SCHED u32 flags; /* SCX_TG_* */ u32 weight; + u64 bw_period_us; + u64 bw_quota_us; + u64 bw_burst_us; #endif }; -- cgit v1.2.3 From 4672aec56d2e8edabcb74c3e2320301d106a377e Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 19 Jun 2025 17:52:38 +0000 Subject: netmem: fix skb_frag_address_safe with unreadable skbs skb_frag_address_safe() needs a check that the skb_frag_page exists check similar to skb_frag_address(). Cc: ap420073@gmail.com Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250619175239.3039329-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9508968cb300..4f6dcb37bae8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3665,7 +3665,13 @@ static inline void *skb_frag_address(const skb_frag_t *frag) */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) { - void *ptr = page_address(skb_frag_page(frag)); + struct page *page = skb_frag_page(frag); + void *ptr; + + if (!page) + return NULL; + + ptr = page_address(page); if (unlikely(!ptr)) return NULL; -- cgit v1.2.3 From 99aa0bbb082e7c0660751832acca897493c3082c Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 20 Jun 2025 11:16:41 +0200 Subject: net: pse-pd: Fix ethnl_pse_send_ntf() stub parameter type The ethnl_pse_send_ntf() stub function has incorrect parameter type when CONFIG_ETHTOOL_NETLINK is disabled. The function should take a net_device pointer instead of phy_device pointer to match the actual implementation. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506200355.TqFiYUbN-lkp@intel.com/ Fixes: fc0e6db30941 ("net: pse-pd: Add support for reporting events") Signed-off-by: Kory Maincent Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250620091641.2098028-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 1dcc4059b5ab..39254b2726c0 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -122,7 +122,7 @@ static inline bool ethtool_dev_mm_supported(struct net_device *dev) return false; } -static inline void ethnl_pse_send_ntf(struct phy_device *phydev, +static inline void ethnl_pse_send_ntf(struct net_device *netdev, unsigned long notif) { } -- cgit v1.2.3 From 520c790c83e9e4c915a8e3fc9f2152ece39b6511 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:02 +0200 Subject: power: supply: core: remove of_node from power_supply_config All drivers have been migrated from .of_node to .fwnode, so let's kill the former. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-2-f9643b958677@collabora.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 7803edaa8ff8..72012141656e 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -232,7 +232,6 @@ struct power_supply; /* Run-time specific power supply configuration */ struct power_supply_config { - struct device_node *of_node; struct fwnode_handle *fwnode; /* Driver private data */ -- cgit v1.2.3 From f368f87b22dab8e97c5f447b00a0cae79fefbdcb Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:06 +0200 Subject: power: supply: core: convert to fwnnode Replace any DT specific code with fwnode in the power-supply core. Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-4-f9643b958677@collabora.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 72012141656e..d90ac7b73755 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -808,7 +808,7 @@ static inline struct power_supply *power_supply_get_by_name(const char *name) { return NULL; } #endif #ifdef CONFIG_OF -extern struct power_supply *power_supply_get_by_phandle(struct device_node *np, +extern struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, const char *property); extern struct power_supply *devm_power_supply_get_by_phandle( struct device *dev, const char *property); -- cgit v1.2.3 From 370643f45aad93476b6489238ccb45a77b94da3f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 8 Jun 2025 22:40:07 +0200 Subject: power: supply: core: rename power_supply_get_by_phandle to power_supply_get_by_reference (devm_)power_supply_get_by_phandle now internally uses fwnode and are no longer DT specific. Thus drop the ifdef check for CONFIG_OF and rename to (devm_)power_supply_get_by_reference to avoid the DT terminology. Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250430-psy-core-convert-to-fwnode-v2-5-f9643b958677@collabora.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index d90ac7b73755..45468959dd98 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -807,19 +807,10 @@ static inline void power_supply_put(struct power_supply *psy) {} static inline struct power_supply *power_supply_get_by_name(const char *name) { return NULL; } #endif -#ifdef CONFIG_OF -extern struct power_supply *power_supply_get_by_phandle(struct fwnode_handle *fwnode, - const char *property); -extern struct power_supply *devm_power_supply_get_by_phandle( +extern struct power_supply *power_supply_get_by_reference(struct fwnode_handle *fwnode, + const char *property); +extern struct power_supply *devm_power_supply_get_by_reference( struct device *dev, const char *property); -#else /* !CONFIG_OF */ -static inline struct power_supply * -power_supply_get_by_phandle(struct device_node *np, const char *property) -{ return NULL; } -static inline struct power_supply * -devm_power_supply_get_by_phandle(struct device *dev, const char *property) -{ return NULL; } -#endif /* CONFIG_OF */ extern const enum power_supply_property power_supply_battery_info_properties[]; extern const size_t power_supply_battery_info_properties_size; -- cgit v1.2.3 From ac558015dfd803626622bd0ba9645d58a3ed16b1 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 16 Jun 2025 19:49:29 +0800 Subject: ASoC: SDCA: add a HID device for HIDE entity This patch supports to add a HID device for SDCA HIDE entity. The codec driver could call 'hid_input_report' to report events. Signed-off-by: Shuming Fan Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250616114929.855496-1-shumingf@realtek.com Signed-off-by: Mark Brown --- include/sound/sdca_hid.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/sound/sdca_hid.h (limited to 'include') diff --git a/include/sound/sdca_hid.h b/include/sound/sdca_hid.h new file mode 100644 index 000000000000..8ab3e498884e --- /dev/null +++ b/include/sound/sdca_hid.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + * + */ + +#ifndef __SDCA_HID_H__ +#define __SDCA_HID_H__ + +#include +#include + +#if IS_ENABLED(CONFIG_SND_SOC_SDCA_HID) +int sdca_add_hid_device(struct device *dev, struct sdca_entity *entity); + +#else +static inline int sdca_add_hid_device(struct device *dev, struct sdca_entity *entity) +{ + return 0; +} + +#endif + +#endif /* __SDCA_HID_H__ */ -- cgit v1.2.3 From 1e2b7fcd3f075ff8c5b0e4474fe145d1c685f54f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Jun 2025 16:51:38 +0800 Subject: crypto: ahash - Stop legacy tfms from using the set_virt fallback path Ensure that drivers that have not been converted to the ahash API do not use the ahash_request_set_virt fallback path as they cannot use the software fallback. Reported-by: Eric Biggers Fixes: 9d7a0ab1c753 ("crypto: ahash - Handle partial blocks in API") Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0f85c543f80b..f052afa6e7b0 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -91,6 +91,12 @@ static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg) !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } +static inline bool crypto_hash_no_export_core(struct crypto_ahash *tfm) +{ + return crypto_hash_alg_common(tfm)->base.cra_flags & + CRYPTO_AHASH_ALG_NO_EXPORT_CORE; +} + int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); -- cgit v1.2.3 From 2566de3e06a35b6517bcab11fd25b65ef90fd180 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 18 Jun 2025 18:00:26 +0800 Subject: crypto: hisilicon - Use fine grained DMA mapping direction The following splat was triggered when booting the kernel built with arm64's defconfig + CRYPTO_SELFTESTS + DMA_API_DEBUG. ------------[ cut here ]------------ DMA-API: hisi_sec2 0000:75:00.0: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 24 PID: 1273 at kernel/dma/debug.c:596 add_dma_entry+0x248/0x308 Call trace: add_dma_entry+0x248/0x308 (P) debug_dma_map_sg+0x208/0x3e4 __dma_map_sg_attrs+0xbc/0x118 dma_map_sg_attrs+0x10/0x24 hisi_acc_sg_buf_map_to_hw_sgl+0x80/0x218 [hisi_qm] sec_cipher_map+0xc4/0x338 [hisi_sec2] sec_aead_sgl_map+0x18/0x24 [hisi_sec2] sec_process+0xb8/0x36c [hisi_sec2] sec_aead_crypto+0xe4/0x264 [hisi_sec2] sec_aead_encrypt+0x14/0x20 [hisi_sec2] crypto_aead_encrypt+0x24/0x38 test_aead_vec_cfg+0x480/0x7e4 test_aead_vec+0x84/0x1b8 alg_test_aead+0xc0/0x498 alg_test.part.0+0x518/0x524 alg_test+0x20/0x64 cryptomgr_test+0x24/0x44 kthread+0x130/0x1fc ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- DMA-API: Mapped at: debug_dma_map_sg+0x234/0x3e4 __dma_map_sg_attrs+0xbc/0x118 dma_map_sg_attrs+0x10/0x24 hisi_acc_sg_buf_map_to_hw_sgl+0x80/0x218 [hisi_qm] sec_cipher_map+0xc4/0x338 [hisi_sec2] This occurs in selftests where the input and the output scatterlist point to the same underlying memory (e.g., when tested with INPLACE_TWO_SGLISTS mode). The problem is that the hisi_sec2 driver maps these two different scatterlists using the DMA_BIDIRECTIONAL flag which leads to overlapped write mappings which are not supported by the DMA layer. Fix it by using the fine grained and correct DMA mapping directions. While at it, switch the DMA directions used by the hisi_zip driver too. Signed-off-by: Zenghui Yu Reviewed-by: Longfang Liu Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 99fcf65d575f..0c4c84b8c3be 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -556,9 +556,9 @@ int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, - u32 index, dma_addr_t *hw_sgl_dma); + u32 index, dma_addr_t *hw_sgl_dma, enum dma_data_direction dir); void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, - struct hisi_acc_hw_sgl *hw_sgl); + struct hisi_acc_hw_sgl *hw_sgl, enum dma_data_direction dir); struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, u32 count, u32 sge_nr); void hisi_acc_free_sgl_pool(struct device *dev, -- cgit v1.2.3 From 2773d282cd56464f62e9b4703c41d2f733a67842 Mon Sep 17 00:00:00 2001 From: Junxuan Liao Date: Sun, 22 Jun 2025 23:01:32 -0500 Subject: docs/vfs: update references to i_mutex to i_rwsem VFS has switched to i_rwsem for ten years now (9902af79c01a: parallel lookups actual switch to rwsem), but the VFS documentation and comments still has references to i_mutex. Signed-off-by: Junxuan Liao Link: https://lore.kernel.org/72223729-5471-474a-af3c-f366691fba82@cs.wisc.edu Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 4 ++-- include/linux/fs.h | 6 +++--- include/linux/fs_stack.h | 2 +- include/linux/quotaops.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 25c4a5afbd44..cfb0dd1ea49c 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -230,7 +230,7 @@ struct handle_to_path_ctx { * directory. The name should be stored in the @name (with the * understanding that it is already pointing to a %NAME_MAX+1 sized * buffer. get_name() should return %0 on success, a negative error code - * or error. @get_name will be called without @parent->i_mutex held. + * or error. @get_name will be called without @parent->i_rwsem held. * * get_parent: * @get_parent should find the parent directory for the given @child which @@ -247,7 +247,7 @@ struct handle_to_path_ctx { * @commit_metadata should commit metadata changes to stable storage. * * Locking rules: - * get_parent is called with child->d_inode->i_mutex down + * get_parent is called with child->d_inode->i_rwsem down * get_name is not (which is possibly inconsistent) */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 1d9586a78041..09e3e80b0528 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -837,7 +837,7 @@ static inline void inode_fake_hash(struct inode *inode) } /* - * inode->i_mutex nesting subclasses for the lock validator: + * inode->i_rwsem nesting subclasses for the lock validator: * * 0: the object of the current VFS operation * 1: parent @@ -989,7 +989,7 @@ static inline loff_t i_size_read(const struct inode *inode) /* * NOTE: unlike i_size_read(), i_size_write() does need locking around it - * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount + * (normally i_rwsem), otherwise on 32bit/SMP an update of i_size_seqcount * can be lost, resulting in subsequent i_size_read() calls spinning forever. */ static inline void i_size_write(struct inode *inode, loff_t i_size) @@ -1921,7 +1921,7 @@ static inline void sb_end_intwrite(struct super_block *sb) * freeze protection should be the outermost lock. In particular, we have: * * sb_start_write - * -> i_mutex (write path, truncate, directory ops, ...) + * -> i_rwsem (write path, truncate, directory ops, ...) * -> s_umount (freeze_super, thaw_super) */ static inline void sb_start_write(struct super_block *sb) diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h index 2b1f74b24070..0cc2fa283305 100644 --- a/include/linux/fs_stack.h +++ b/include/linux/fs_stack.h @@ -3,7 +3,7 @@ #define _LINUX_FS_STACK_H /* This file defines generic functions used primarily by stackable - * filesystems; none of these functions require i_mutex to be held. + * filesystems; none of these functions require i_rwsem to be held. */ #include diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 06cc8888199e..c334f82ed385 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -19,7 +19,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) return &sb->s_dquot; } -/* i_mutex must being held */ +/* i_rwsem must being held */ static inline bool is_quota_modification(struct mnt_idmap *idmap, struct inode *inode, struct iattr *ia) { -- cgit v1.2.3 From cbe4134ea4bc493239786220bd69cb8a13493190 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Fri, 20 Jun 2025 07:03:30 +0000 Subject: fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass Export anon_inode_make_secure_inode() to allow KVM guest_memfd to create anonymous inodes with proper security context. This replaces the current pattern of calling alloc_anon_inode() followed by inode_init_security_anon() for creating security context manually. This change also fixes a security regression in secretmem where the S_PRIVATE flag was not cleared after alloc_anon_inode(), causing LSM/SELinux checks to be bypassed for secretmem file descriptors. As guest_memfd currently resides in the KVM module, we need to export this symbol for use outside the core kernel. In the future, guest_memfd might be moved to core-mm, at which point the symbols no longer would have to be exported. When/if that happens is still unclear. Fixes: 2bfe15c52612 ("mm: create security context for memfd_secret inodes") Suggested-by: David Hildenbrand Suggested-by: Mike Rapoport Signed-off-by: Shivank Garg Link: https://lore.kernel.org/20250620070328.803704-3-shivankg@amd.com Acked-by: "Mike Rapoport (Microsoft)" Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f14..3a8e643c4279 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3606,6 +3606,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; -- cgit v1.2.3 From 0c40d7cb5ef3af260e8c7f88e0e5d7ae15d6ce57 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 19 Jun 2025 19:17:58 +0800 Subject: block: introduce max_{hw|user}_wzeroes_unmap_sectors to queue limits Currently, disks primarily implement the write zeroes command (aka REQ_OP_WRITE_ZEROES) through two mechanisms: the first involves physically writing zeros to the disk media (e.g., HDDs), while the second performs an unmap operation on the logical blocks, effectively putting them into a deallocated state (e.g., SSDs). The first method is generally slow, while the second method is typically very fast. For example, on certain NVMe SSDs that support NVME_NS_DEAC, submitting REQ_OP_WRITE_ZEROES requests with the NVME_WZ_DEAC bit can accelerate the write zeros operation by placing disk blocks into a deallocated state, which opportunistically avoids writing zeroes to media while still guaranteeing that subsequent reads from the specified block range will return zeroed data. This is a best-effort optimization, not a mandatory requirement, some devices may partially fall back to writing physical zeroes due to factors such as misalignment or being asked to clear a block range smaller than the device's internal allocation unit. Therefore, the speed of this operation is not guaranteed. It is difficult to determine whether the storage device supports unmap write zeroes operation. We cannot determine this by only querying bdev_limits(bdev)->max_write_zeroes_sectors. Therefore, first, add a new hardware queue limit parameters, max_hw_wzeroes_unmap_sectors, to indicate whether a device supports this unmap write zeroes operation. Then, add two new counterpart software queue limits, max_wzeroes_unmap_sectors and max_user_wzeroes_unmap_sectors, which allow users to disable this operation if the speed is very slow on some sepcial devices. Finally, for the stacked devices cases, initialize these two parameters to UINT_MAX. This operation should be enabled by both the stacking driver and all underlying devices. Thanks to Martin K. Petersen for optimizing the documentation of the write_zeroes_unmap sysfs interface. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/20250619111806.3546162-2-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..1a5725c1f93d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -383,6 +383,9 @@ struct queue_limits { unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_wzeroes_unmap_sectors; + unsigned int max_hw_wzeroes_unmap_sectors; + unsigned int max_user_wzeroes_unmap_sectors; unsigned int max_hw_zone_append_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; @@ -1042,6 +1045,7 @@ static inline void blk_queue_disable_secure_erase(struct request_queue *q) static inline void blk_queue_disable_write_zeroes(struct request_queue *q) { q->limits.max_write_zeroes_sectors = 0; + q->limits.max_wzeroes_unmap_sectors = 0; } /* @@ -1378,6 +1382,12 @@ static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) return bdev_limits(bdev)->max_write_zeroes_sectors; } +static inline unsigned int +bdev_write_zeroes_unmap_sectors(struct block_device *bdev) +{ + return bdev_limits(bdev)->max_wzeroes_unmap_sectors; +} + static inline bool bdev_nonrot(struct block_device *bdev) { return blk_queue_nonrot(bdev_get_queue(bdev)); -- cgit v1.2.3 From 7bd43cc79cab3850f34da0a31d5b042b701590ef Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 19 Jun 2025 19:18:03 +0800 Subject: fs: introduce FALLOC_FL_WRITE_ZEROES to fallocate With the development of flash-based storage devices, we can quickly write zeros to SSDs using the WRITE_ZERO command if the devices do not actually write physical zeroes to the media. Therefore, we can use this command to quickly preallocate a real all-zero file with written extents. This approach should be beneficial for subsequent pure overwriting within this file, as it can save on block allocation and, consequently, significant metadata changes, which should greatly improve overwrite performance on certain filesystems. Therefore, introduce a new operation FALLOC_FL_WRITE_ZEROES to fallocate. This flag is used to convert a specified range of a file to zeros by issuing a zeroing operation. Blocks should be allocated for the regions that span holes in the file, and the entire range is converted to written extents. If the underlying device supports the actual offload write zeroes command, the process of zeroing out operation can be accelerated. If it does not, we currently don't prevent the file system from writing actual zeros to the device. This provides users with a new method to quickly generate a zeroed file, users no longer need to write zero data to create a file with written extents. Users can determine whether a disk supports the unmap write zeroes feature through querying this sysfs interface: /sys/block//queue/write_zeroes_unmap_max_hw_bytes Users can also enable or disable the unmap write zeroes operation through this sysfs interface: /sys/block//queue/write_zeroes_unmap_max_bytes Finally, this flag cannot be specified in conjunction with the FALLOC_FL_KEEP_SIZE since allocating written extents beyond file EOF is not permitted. In addition, filesystems that always require out-of-place writes should not support this flag since they still need to allocated new blocks during subsequent overwrites. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/20250619111806.3546162-7-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Signed-off-by: Christian Brauner --- include/linux/falloc.h | 3 ++- include/uapi/linux/falloc.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 3f49f3df6af5..7c38c6b76b60 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -36,7 +36,8 @@ struct space_resv { FALLOC_FL_COLLAPSE_RANGE | \ FALLOC_FL_ZERO_RANGE | \ FALLOC_FL_INSERT_RANGE | \ - FALLOC_FL_UNSHARE_RANGE) + FALLOC_FL_UNSHARE_RANGE | \ + FALLOC_FL_WRITE_ZEROES) /* on ia32 l_start is on a 32-bit boundary */ #if defined(CONFIG_X86_64) diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h index 5810371ed72b..1f9ca757d02d 100644 --- a/include/uapi/linux/falloc.h +++ b/include/uapi/linux/falloc.h @@ -78,4 +78,21 @@ */ #define FALLOC_FL_UNSHARE_RANGE 0x40 +/* + * FALLOC_FL_WRITE_ZEROES zeroes a specified file range in such a way that + * subsequent writes to that range do not require further changes to the file + * mapping metadata. This flag is beneficial for subsequent pure overwriting + * within this range, as it can save on block allocation and, consequently, + * significant metadata changes. Therefore, filesystems that always require + * out-of-place writes should not support this flag. + * + * Different filesystems may implement different limitations on the + * granularity of the zeroing operation. Most will preferably be accelerated + * by submitting write zeroes command if the backing storage supports, which + * may not physically write zeros to the media. + * + * This flag cannot be specified in conjunction with the FALLOC_FL_KEEP_SIZE. + */ +#define FALLOC_FL_WRITE_ZEROES 0x80 + #endif /* _UAPI_FALLOC_H_ */ -- cgit v1.2.3 From f4265b8d32c49ff95711e6fef7d05245a2905b30 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 19 Jun 2025 19:18:06 +0800 Subject: ext4: add FALLOC_FL_WRITE_ZEROES support Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable the unmap write zeroes operation. This first allocates blocks as unwritten, then issues a zero command outside of the running journal handle, and finally converts them to a written state. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/20250619111806.3546162-10-yi.zhang@huaweicloud.com Reviewed-by: "Martin K. Petersen" Signed-off-by: Christian Brauner --- include/trace/events/ext4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 156908641e68..6f9cf2811733 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ - { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) + { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}, \ + { FALLOC_FL_WRITE_ZEROES, "WRITE_ZEROES"}) TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); -- cgit v1.2.3 From b872f562c8cef59743993b48eb458c2d87c1651e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 23 Jun 2025 19:11:50 +0800 Subject: dm-crypt: Extend state buffer size in crypt_iv_lmk_one Add a macro CRYPTO_MD5_STATESIZE for the Crypto API export state size of md5 and use that in dm-crypt instead of relying on the size of struct md5_state (the latter is currently undergoing a transition and may shrink). This commit fixes a crash on 32-bit machines: Oops: Oops: 0000 [#1] SMP CPU: 1 UID: 0 PID: 12 Comm: kworker/u16:0 Not tainted 6.16.0-rc2+ #993 PREEMPT(full) Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Workqueue: kcryptd-254:0-1 kcryptd_crypt [dm_crypt] EIP: __crypto_shash_export+0xf/0x90 Code: 4a c1 c7 40 20 a0 b4 4a c1 81 cf 0e 00 04 08 89 78 50 e9 2b ff ff ff 8d 74 26 00 55 89 e5 57 56 53 89 c3 89 d6 8b 00 8b 40 14 <8b> 50 fc f6 40 13 01 74 04 4a 2b 50 14 85 c9 74 10 89 f2 89 d8 ff EAX: 303a3435 EBX: c3007c90 ECX: 00000000 EDX: c3007c38 ESI: c3007c38 EDI: c3007c90 EBP: c3007bfc ESP: c3007bf0 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010216 CR0: 80050033 CR2: 303a3431 CR3: 04fbe000 CR4: 00350e90 Call Trace: crypto_shash_export+0x65/0xc0 crypt_iv_lmk_one+0x106/0x1a0 [dm_crypt] Fixes: efd62c85525e ("crypto: md5-generic - Use API partial block handling") Reported-by: Milan Broz Signed-off-by: Herbert Xu Tested-by: Milan Broz Closes: https://lore.kernel.org/linux-crypto/f1625ddc-e82e-4b77-80c2-dc8e45b54848@gmail.com/T/ Signed-off-by: Mikulas Patocka --- include/crypto/hash.h | 2 ++ include/crypto/md5.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 6f6b9de12cd3..db294d452e8c 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -202,6 +202,8 @@ struct shash_desc { #define HASH_REQUEST_CLONE(name, gfp) \ hash_request_clone(name, sizeof(__##name##_req), gfp) +#define CRYPTO_HASH_STATESIZE(coresize, blocksize) (coresize + blocksize + 1) + /** * struct shash_alg - synchronous message digest definition * @init: see struct ahash_alg diff --git a/include/crypto/md5.h b/include/crypto/md5.h index 198b5d69b92f..28ee533a0507 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -2,6 +2,7 @@ #ifndef _CRYPTO_MD5_H #define _CRYPTO_MD5_H +#include #include #define MD5_DIGEST_SIZE 16 @@ -15,6 +16,9 @@ #define MD5_H2 0x98badcfeUL #define MD5_H3 0x10325476UL +#define CRYPTO_MD5_STATESIZE \ + CRYPTO_HASH_STATESIZE(MD5_STATE_SIZE, MD5_HMAC_BLOCK_SIZE) + extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE]; struct md5_state { -- cgit v1.2.3 From a24cc6ce1933eade12aa2b9859de0fcd2dac2c06 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 23 Jun 2025 10:34:08 +0200 Subject: futex: Initialize futex_phash_new during fork(). During a hash resize operation the new private hash is stored in mm_struct::futex_phash_new if the current hash can not be immediately replaced. The new hash must not be copied during fork() into the new task. Doing so will lead to a double-free of the memory by the two tasks. Initialize the mm_struct::futex_phash_new during fork(). Closes: https://lore.kernel.org/all/aFBQ8CBKmRzEqIfS@mozart.vkv.me/ Fixes: bd54df5ea7cad ("futex: Allow to resize the private local hash") Reported-by: Calvin Owens Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Tested-by: Calvin Owens Link: https://lkml.kernel.org/r/20250623083408.jTiJiC6_@linutronix.de --- include/linux/futex.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 005b040c4791..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; mutex_init(&mm->futex_hash_lock); } -- cgit v1.2.3 From 4d811e395bbe54ba2febb3940d4b6c4741f360a6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Jun 2025 11:48:33 -0600 Subject: io_uring: add IO_URING_F_INLINE issue flag Set when the execution of the request is done inline from the system call itself. Any deferred issue will never have this flag set. Reviewed-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..054c43c02c96 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -26,6 +26,8 @@ enum io_uring_cmd_flags { IO_URING_F_MULTISHOT = 4, /* executed by io-wq */ IO_URING_F_IOWQ = 8, + /* executed inline from syscall */ + IO_URING_F_INLINE = 16, /* int's last bit, sign checks are usually faster than a bit test */ IO_URING_F_NONBLOCK = INT_MIN, -- cgit v1.2.3 From af19388a973877b2349df46c4487a789cd3148ed Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Jun 2025 11:33:52 -0600 Subject: io_uring: add struct io_cold_def->sqe_copy() method Will be called by the core of io_uring, if inline issue is not going to be tried for a request. Opcodes can define this handler to defer copying of SQE data that should remain stable. Only called if IO_URING_F_INLINE is set. If it isn't set, then there's a bug in the core handling of this, and -EFAULT will be returned instead to terminate the request. This will trigger a WARN_ON_ONCE(). Don't expect this to ever trigger, and down the line this can be removed. Reviewed-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 054c43c02c96..4ab3bdc103f2 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -504,6 +504,7 @@ enum { REQ_F_BUF_NODE_BIT, REQ_F_HAS_METADATA_BIT, REQ_F_IMPORT_BUFFER_BIT, + REQ_F_SQE_COPIED_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -593,6 +594,8 @@ enum { * For SEND_ZC, whether to import buffers (i.e. the first issue). */ REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT), + /* ->sqe_copy() has been called, if necessary */ + REQ_F_SQE_COPIED = IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw); -- cgit v1.2.3 From cb9ccfb404e700dc0db59d68242d79fe386bb3f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 17:05:19 -0600 Subject: io_uring/nop: add IORING_NOP_TW completion flag To test and profile the overhead of io_uring task_work and the various types of it, add IORING_NOP_TW which tells nop to signal completions through task_work rather than complete them inline. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cfd17e382082..8c3d43caab02 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -449,6 +449,7 @@ enum io_uring_msg_ring_flags { #define IORING_NOP_FILE (1U << 1) #define IORING_NOP_FIXED_FILE (1U << 2) #define IORING_NOP_FIXED_BUFFER (1U << 3) +#define IORING_NOP_TW (1U << 4) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From 1d6123102e9fbedc8d25bf4731da6d513173e49e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 17 Jun 2025 09:58:13 -0700 Subject: Bluetooth: hci_core: Fix use-after-free in vhci_flush() syzbot reported use-after-free in vhci_flush() without repro. [0] From the splat, a thread close()d a vhci file descriptor while its device was being used by iotcl() on another thread. Once the last fd refcnt is released, vhci_release() calls hci_unregister_dev(), hci_free_dev(), and kfree() for struct vhci_data, which is set to hci_dev->dev->driver_data. The problem is that there is no synchronisation after unlinking hdev from hci_dev_list in hci_unregister_dev(). There might be another thread still accessing the hdev which was fetched before the unlink operation. We can use SRCU for such synchronisation. Let's run hci_dev_reset() under SRCU and wait for its completion in hci_unregister_dev(). Another option would be to restore hci_dev->destruct(), which was removed in commit 587ae086f6e4 ("Bluetooth: Remove unused hci-destruct cb"). However, this would not be a good solution, as we should not run hci_unregister_dev() while there are in-flight ioctl() requests, which could lead to another data-race KCSAN splat. Note that other drivers seem to have the same problem, for exmaple, virtbt_remove(). [0]: BUG: KASAN: slab-use-after-free in skb_queue_empty_lockless include/linux/skbuff.h:1891 [inline] BUG: KASAN: slab-use-after-free in skb_queue_purge_reason+0x99/0x360 net/core/skbuff.c:3937 Read of size 8 at addr ffff88807cb8d858 by task syz.1.219/6718 CPU: 1 UID: 0 PID: 6718 Comm: syz.1.219 Not tainted 6.16.0-rc1-syzkaller-00196-g08207f42d3ff #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 skb_queue_empty_lockless include/linux/skbuff.h:1891 [inline] skb_queue_purge_reason+0x99/0x360 net/core/skbuff.c:3937 skb_queue_purge include/linux/skbuff.h:3368 [inline] vhci_flush+0x44/0x50 drivers/bluetooth/hci_vhci.c:69 hci_dev_do_reset net/bluetooth/hci_core.c:552 [inline] hci_dev_reset+0x420/0x5c0 net/bluetooth/hci_core.c:592 sock_do_ioctl+0xd9/0x300 net/socket.c:1190 sock_ioctl+0x576/0x790 net/socket.c:1311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fcf5b98e929 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fcf5c7b9038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fcf5bbb6160 RCX: 00007fcf5b98e929 RDX: 0000000000000000 RSI: 00000000400448cb RDI: 0000000000000009 RBP: 00007fcf5ba10b39 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007fcf5bbb6160 R15: 00007ffd6353d528 Allocated by task 6535: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x230/0x3d0 mm/slub.c:4359 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] vhci_open+0x57/0x360 drivers/bluetooth/hci_vhci.c:635 misc_open+0x2bc/0x330 drivers/char/misc.c:161 chrdev_open+0x4c9/0x5e0 fs/char_dev.c:414 do_dentry_open+0xdf0/0x1970 fs/open.c:964 vfs_open+0x3b/0x340 fs/open.c:1094 do_open fs/namei.c:3887 [inline] path_openat+0x2ee5/0x3830 fs/namei.c:4046 do_filp_open+0x1fa/0x410 fs/namei.c:4073 do_sys_openat2+0x121/0x1c0 fs/open.c:1437 do_sys_open fs/open.c:1452 [inline] __do_sys_openat fs/open.c:1468 [inline] __se_sys_openat fs/open.c:1463 [inline] __x64_sys_openat+0x138/0x170 fs/open.c:1463 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 6535: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x62/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2381 [inline] slab_free mm/slub.c:4643 [inline] kfree+0x18e/0x440 mm/slub.c:4842 vhci_release+0xbc/0xd0 drivers/bluetooth/hci_vhci.c:671 __fput+0x44c/0xa70 fs/file_table.c:465 task_work_run+0x1d1/0x260 kernel/task_work.c:227 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0x6ad/0x22e0 kernel/exit.c:955 do_group_exit+0x21c/0x2d0 kernel/exit.c:1104 __do_sys_exit_group kernel/exit.c:1115 [inline] __se_sys_exit_group kernel/exit.c:1113 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1113 x64_sys_call+0x21ba/0x21c0 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff88807cb8d800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 88 bytes inside of freed 1024-byte region [ffff88807cb8d800, ffff88807cb8dc00) Fixes: bf18c7118cf8 ("Bluetooth: vhci: Free driver_data on file release") Reported-by: syzbot+2faa4825e556199361f9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f62d64848fc4c7c30cd6 Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a760f05fa3fb..9fc8f544e20e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -347,6 +348,7 @@ struct adv_monitor { struct hci_dev { struct list_head list; + struct srcu_struct srcu; struct mutex lock; struct ida unset_handle_ida; -- cgit v1.2.3 From 9e4ed359b8efad0e8ad4510d8ad22bf0b060526a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 16 Jun 2025 10:46:29 +0100 Subject: io_uring/netcmd: add tx timestamping cmd support Add a new socket command which returns tx time stamps to the user. It provide an alternative to the existing error queue recvmsg interface. The command works in a polled multishot mode, which means io_uring will poll the socket and keep posting timestamps until the request is cancelled or fails in any other way (e.g. with no space in the CQ). It reuses the net infra and grabs timestamps from the socket's error queue. The command requires IORING_SETUP_CQE32. All non-final CQEs (marked with IORING_CQE_F_MORE) have cqe->res set to the tskey, and the upper 16 bits of cqe->flags keep tstype (i.e. offset by IORING_CQE_BUFFER_SHIFT). The timevalue is store in the upper part of the extended CQE. The final completion won't have IORING_CQE_F_MORE and will have cqe->res storing 0/error. Suggested-by: Vadim Fedorenko Acked-by: Willem de Bruijn Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/92ee66e6b33b8de062a977843d825f58f21ecd37.1750065793.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8c3d43caab02..85600ad0ac08 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -969,6 +969,22 @@ enum io_uring_socket_op { SOCKET_URING_OP_SIOCOUTQ, SOCKET_URING_OP_GETSOCKOPT, SOCKET_URING_OP_SETSOCKOPT, + SOCKET_URING_OP_TX_TIMESTAMP, +}; + +/* + * SOCKET_URING_OP_TX_TIMESTAMP definitions + */ + +#define IORING_TIMESTAMP_HW_SHIFT 16 +/* The cqe->flags bit from which the timestamp type is stored */ +#define IORING_TIMESTAMP_TYPE_SHIFT (IORING_TIMESTAMP_HW_SHIFT + 1) +/* The cqe->flags flag signifying whether it's a hardware timestamp */ +#define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT); + +struct io_timespec { + __u64 tv_sec; + __u64 tv_nsec; }; /* Zero copy receive refill queue entry */ -- cgit v1.2.3 From c4cdbaf9d81c8387da8b9b91567d4b0eb1b8a549 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:24 -0700 Subject: iommu/amd: KVM: SVM: Use pi_desc_addr to derive ga_root_ptr Use vcpu_data.pi_desc_addr instead of amd_iommu_pi_data.base to get the GA root pointer. KVM is the only source of amd_iommu_pi_data.base, and KVM's one and only path for writing amd_iommu_pi_data.base computes the exact same value for vcpu_data.pi_desc_addr and amd_iommu_pi_data.base, and fills amd_iommu_pi_data.base if and only if vcpu_data.pi_desc_addr is valid, i.e. amd_iommu_pi_data.base is fully redundant. Cc: Maxim Levitsky Reviewed-by: Joao Martins Reviewed-by: Vasant Hegde Tested-by: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-23-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/amd-iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 1f9b13d803c5..deeefc92a5cf 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -19,8 +19,6 @@ struct amd_iommu; */ struct amd_iommu_pi_data { u32 ga_tag; - u64 base; - bool is_guest_mode; struct vcpu_data *vcpu_data; void *ir_data; -- cgit v1.2.3 From 53527ea1b70224d16d29edbd5c850456469f00ec Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:34 -0700 Subject: iommu: KVM: Split "struct vcpu_data" into separate AMD vs. Intel structs Split the vcpu_data structure that serves as a handoff from KVM to IOMMU drivers into vendor specific structures. Overloading a single structure makes the code hard to read and maintain, is *very* misleading as it suggests that mixing vendors is actually supported, and bastardizing Intel's posted interrupt descriptor address when AMD's IOMMU already has its own structure is quite unnecessary. Tested-by: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-33-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/amd-iommu.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index deeefc92a5cf..99b4fa9a0296 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -12,18 +12,6 @@ struct amd_iommu; -/* - * This is mainly used to communicate information back-and-forth - * between SVM and IOMMU for setting up and tearing down posted - * interrupt - */ -struct amd_iommu_pi_data { - u32 ga_tag; - bool is_guest_mode; - struct vcpu_data *vcpu_data; - void *ir_data; -}; - #ifdef CONFIG_AMD_IOMMU struct task_struct; -- cgit v1.2.3 From b33252b9d17238a4a9fa5a29af6e6a2922a6c2b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:35 -0700 Subject: KVM: Don't WARN if updating IRQ bypass route fails Don't bother WARNing if updating an IRTE route fails now that vendor code provides much more precise WARNs. The generic WARN doesn't provide enough information to actually debug the problem, and has obviously done nothing to surface the myriad bugs in KVM x86's implementation. Drop all of the associated return code plumbing that existed just so that common KVM could WARN. Link: https://lore.kernel.org/r/20250611224604.313496-34-seanjc@google.com Signed-off-by: Sean Christopherson --- include/kvm/arm_vgic.h | 2 +- include/linux/kvm_host.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4a34f7f0a864..b2a04481de1a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -434,7 +434,7 @@ struct kvm_kernel_irq_routing_entry; int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); -int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq); +void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq); int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4160c1c0c6b..8e74ac0f90b1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2410,9 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); -int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, - struct kvm_kernel_irq_routing_entry *old, - struct kvm_kernel_irq_routing_entry *new); +void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new); bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, struct kvm_kernel_irq_routing_entry *); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ -- cgit v1.2.3 From 77bb184ab880171a1cedfbed9ab05977e6ae2258 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:36 -0700 Subject: KVM: Fold kvm_arch_irqfd_route_changed() into kvm_arch_update_irqfd_routing() Fold kvm_arch_irqfd_route_changed() into kvm_arch_update_irqfd_routing(). Calling arch code to know whether or not to call arch code is absurd. Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250611224604.313496-35-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8e74ac0f90b1..fb9ec06aa807 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2413,8 +2413,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new); -bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, - struct kvm_kernel_irq_routing_entry *); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS -- cgit v1.2.3 From 08d9ccdd1a5c75d7aca7ac3af56f723d780dd6ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:43 -0700 Subject: iommu/amd: KVM: SVM: Infer IsRun from validity of pCPU destination Infer whether or not a vCPU should be marked running from the validity of the pCPU on which it is running. amd_iommu_update_ga() already skips the IRTE update if the pCPU is invalid, i.e. passing %true for is_run with an invalid pCPU would be a blatant and egregrious KVM bug. Tested-by: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-42-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/amd-iommu.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99b4fa9a0296..fe0e16ffe0e5 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -30,8 +30,7 @@ static inline void amd_iommu_detect(void) { } /* IOMMU AVIC Function */ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); -extern int -amd_iommu_update_ga(int cpu, bool is_run, void *data); +extern int amd_iommu_update_ga(int cpu, void *data); extern int amd_iommu_activate_guest_mode(void *data); extern int amd_iommu_deactivate_guest_mode(void *data); @@ -44,8 +43,7 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) return 0; } -static inline int -amd_iommu_update_ga(int cpu, bool is_run, void *data) +static inline int amd_iommu_update_ga(int cpu, void *data) { return 0; } -- cgit v1.2.3 From f965255dc5033387ac7858787c6c792fd789ac34 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:45 -0700 Subject: iommu/amd: KVM: SVM: Set pCPU info in IRTE when setting vCPU affinity Now that setting vCPU affinity is guarded with ir_list_lock, i.e. now that avic_physical_id_entry can be safely accessed, set the pCPU info straight-away when setting vCPU affinity. Putting the IRTE into posted mode, and then immediately updating the IRTE a second time if the target vCPU is running is wasteful and confusing. This also fixes a flaw where a posted IRQ that arrives between putting the IRTE into guest_mode and setting the correct destination could cause the IOMMU to ring the doorbell on the wrong pCPU. Link: https://lore.kernel.org/r/20250611224604.313496-44-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/amd-iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index fe0e16ffe0e5..c9f2df0c4596 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -32,7 +32,7 @@ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); extern int amd_iommu_update_ga(int cpu, void *data); -extern int amd_iommu_activate_guest_mode(void *data); +extern int amd_iommu_activate_guest_mode(void *data, int cpu); extern int amd_iommu_deactivate_guest_mode(void *data); #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ @@ -48,7 +48,7 @@ static inline int amd_iommu_update_ga(int cpu, void *data) return 0; } -static inline int amd_iommu_activate_guest_mode(void *data) +static inline int amd_iommu_activate_guest_mode(void *data, int cpu) { return 0; } -- cgit v1.2.3 From b9e53f9ff4a88f01b22524878c9a381a6c5f65ff Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:46:03 -0700 Subject: iommu/amd: KVM: SVM: Allow KVM to control need for GA log interrupts Add plumbing to the AMD IOMMU driver to allow KVM to control whether or not an IRTE is configured to generate GA log interrupts. KVM only needs a notification if the target vCPU is blocking, so the vCPU can be awakened. If a vCPU is preempted or exits to userspace, KVM clears is_run, but will set the vCPU back to running when userspace does KVM_RUN and/or the vCPU task is scheduled back in, i.e. KVM doesn't need a notification. Unconditionally pass "true" in all KVM paths to isolate the IOMMU changes from the KVM changes insofar as possible. Opportunistically swap the ordering of parameters for amd_iommu_update_ga() so that the match amd_iommu_activate_guest_mode(). Note, as of this writing, the AMD IOMMU manual doesn't list GALogIntr as a non-cached field, but per AMD hardware architects, it's not cached and can be safely updated without an invalidation. Link: https://lore.kernel.org/all/b29b8c22-2fd4-4b5e-b755-9198874157c7@amd.com Cc: Vasant Hegde Cc: Joao Martins Link: https://lore.kernel.org/r/20250611224604.313496-62-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/amd-iommu.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index c9f2df0c4596..8cced632ecd0 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -30,9 +30,8 @@ static inline void amd_iommu_detect(void) { } /* IOMMU AVIC Function */ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); -extern int amd_iommu_update_ga(int cpu, void *data); - -extern int amd_iommu_activate_guest_mode(void *data, int cpu); +extern int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr); +extern int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr); extern int amd_iommu_deactivate_guest_mode(void *data); #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ @@ -43,12 +42,12 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) return 0; } -static inline int amd_iommu_update_ga(int cpu, void *data) +static inline int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr) { return 0; } -static inline int amd_iommu_activate_guest_mode(void *data, int cpu) +static inline int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr) { return 0; } -- cgit v1.2.3 From 283ed5001d6852f85c09ed2522331b2b197ba937 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 May 2025 16:52:11 -0700 Subject: KVM: Use a local struct to do the initial vfs_poll() on an irqfd Use a function-local struct for the poll_table passed to vfs_poll(), as nothing in the vfs_poll() callchain grabs a long-term reference to the structure, i.e. its lifetime doesn't need to be tied to the irqfd. Using a local structure will also allow propagating failures out of the polling callback without further polluting kvm_kernel_irqfd. Opportunstically rename irqfd_ptable_queue_proc() to kvm_irqfd_register() to capture what it actually does. Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250522235223.3178519-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_irqfd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 361c07f4466d..ef8c134ded8a 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -55,7 +55,6 @@ struct kvm_kernel_irqfd { /* Used for setup/shutdown */ struct eventfd_ctx *eventfd; struct list_head list; - poll_table pt; struct work_struct shutdown; struct irq_bypass_consumer consumer; struct irq_bypass_producer *producer; -- cgit v1.2.3 From 0d09582b3a607436fd91d6ce813048a048ecbf10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 May 2025 16:52:18 -0700 Subject: sched/wait: Add a waitqueue helper for fully exclusive priority waiters Add a waitqueue helper to add a priority waiter that requires exclusive wakeups, i.e. that requires that it be the _only_ priority waiter. The API will be used by KVM to ensure that at most one of KVM's irqfds is bound to a single eventfd (across the entire kernel). Open code the helper instead of using __add_wait_queue() so that the common path doesn't need to "handle" impossible failures. Cc: K Prateek Nayak Reviewed-by: K Prateek Nayak Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250522235223.3178519-9-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/wait.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 965a19809c7e..09855d819418 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -164,6 +164,8 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head, + struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) -- cgit v1.2.3 From 7484e15dbb016d9d40f8c6e0475810212ae181db Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Jun 2025 00:09:51 -0400 Subject: replace collect_mounts()/drop_collected_mounts() with a safer variant collect_mounts() has several problems - one can't iterate over the results directly, so it has to be done with callback passed to iterate_mounts(); it has an oopsable race with d_invalidate(); it creates temporary clones of mounts invisibly for sync umount (IOW, you can have non-lazy umount succeed leaving filesystem not mounted anywhere and yet still busy). A saner approach is to give caller an array of struct path that would pin every mount in a subtree, without cloning any mounts. * collect_mounts()/drop_collected_mounts()/iterate_mounts() is gone * collect_paths(where, preallocated, size) gives either ERR_PTR(-E...) or a pointer to array of struct path, one for each chunk of tree visible under 'where' (i.e. the first element is a copy of where, followed by (mount,root) for everything mounted under it - the same set collect_mounts() would give). Unlike collect_mounts(), the mounts are *not* cloned - we just get pinning references to the roots of subtrees in the caller's namespace. Array is terminated by {NULL, NULL} struct path. If it fits into preallocated array (on-stack, normally), that's where it goes; otherwise it's allocated by kmalloc_array(). Passing 0 as size means that 'preallocated' is ignored (and expected to be NULL). * drop_collected_paths(paths, preallocated) is given the array returned by an earlier call of collect_paths() and the preallocated array passed to that call. All mount/dentry references are dropped and array is kfree'd if it's not equal to 'preallocated'. * instead of iterate_mounts(), users should just iterate over array of struct path - nothing exotic is needed for that. Existing users (all in audit_tree.c) are converted. [folded a fix for braino reported by Venkat Rao Bagalkote ] Fixes: 80b5dce8c59b0 ("vfs: Add a function to lazily unmount all mounts from any dentry") Tested-by: Venkat Rao Bagalkote Signed-off-by: Al Viro --- include/linux/mount.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 4880f434c021..1a508beba446 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -116,10 +116,8 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(const struct path *); -extern void drop_collected_mounts(struct vfsmount *); -extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, - struct vfsmount *); +extern struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(struct path *, struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); -- cgit v1.2.3 From fc2898ea793a48bc4b74b61cde2d8656f20efdf4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 23 Jun 2025 01:30:49 +0100 Subject: workqueue: Remove unused work_on_cpu_safe The last use of the work_on_cpu_safe() macro was removed recently by commit 9cda46babdfe ("crypto: n2 - remove Niagara2 SPU driver") Remove it, and the work_on_cpu_safe_key() function it calls. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 83d158bb2791..45d5dd470ff6 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -847,19 +847,6 @@ long work_on_cpu_key(int cpu, long (*fn)(void *), work_on_cpu_key(_cpu, _fn, _arg, &__key); \ }) -long work_on_cpu_safe_key(int cpu, long (*fn)(void *), - void *arg, struct lock_class_key *key); - -/* - * A new key is defined for each caller to make sure the work - * associated with the function doesn't share its locking class. - */ -#define work_on_cpu_safe(_cpu, _fn, _arg) \ -({ \ - static struct lock_class_key __key; \ - \ - work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ -}) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER -- cgit v1.2.3 From 3aa54d162490f14d1f1fdf3b3d1170b2ea50276b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 18 Jun 2025 11:11:29 +0200 Subject: PCI/pwrctrl: Fix the kerneldoc tag for private fields The correct tag for marking private fields in kerneldoc is "private:", not capitalized "Private:". Fix the pwrctl struct to silence the following warnings: Warning: include/linux/pci-pwrctrl.h:45 struct member 'nb' not described in 'pci_pwrctrl' Warning: include/linux/pci-pwrctrl.h:45 struct member 'link' not described in 'pci_pwrctrl' Warning: include/linux/pci-pwrctrl.h:45 struct member 'work' not described in 'pci_pwrctrl' Fixes: 4565d2652a37 ("PCI/pwrctl: Add PCI power control core code") Reported-by: Bjorn Helgaas Closes: https://lore.kernel.org/all/20250617233539.GA1177120@bhelgaas/ Signed-off-by: Bartosz Golaszewski Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250618091129.44810-1-brgl@bgdev.pl --- include/linux/pci-pwrctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 7d439b0675e9..4aefc7901cd1 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -39,7 +39,7 @@ struct device_link; struct pci_pwrctrl { struct device *dev; - /* Private: don't use. */ + /* private: internal use only */ struct notifier_block nb; struct device_link *link; struct work_struct work; -- cgit v1.2.3 From 1174bf15bd601f17556f721798cd9183e169549a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 19 May 2025 14:29:00 +0300 Subject: drm/connector: move HDR sink metadata to display info Information parsed from the display EDID should be stored in display info. Move HDR sink metadata there. Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250519112900.1383997-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_connector.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 73903c3c842f..9fdc4581dd90 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -799,6 +799,11 @@ struct drm_display_info { */ struct drm_hdmi_info hdmi; + /** + * @hdr_sink_metadata: HDR Metadata Information read from sink + */ + struct hdr_sink_metadata hdr_sink_metadata; + /** * @non_desktop: Non desktop display (HMD). */ @@ -2284,9 +2289,6 @@ struct drm_connector { */ struct llist_node free_node; - /** @hdr_sink_metadata: HDR Metadata Information read from sink */ - struct hdr_sink_metadata hdr_sink_metadata; - /** * @hdmi: HDMI-related variable and properties. */ -- cgit v1.2.3 From e84a4927a404f369c842c19de93b216627fcc690 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jun 2025 13:30:00 +0000 Subject: net: annotate races around sk->sk_uid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sk->sk_uid can be read while another thread changes its value in sockfs_setattr(). Add sk_uid(const struct sock *sk) helper to factorize the needed READ_ONCE() annotations, and add corresponding WRITE_ONCE() where needed. Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Dumazet Cc: Lorenzo Colitti Reviewed-by: Maciej Żenczykowski Link: https://patch.msgid.link/20250620133001.4090592-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 4 ++-- include/net/sock.h | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 8e39aa822cf9..3d3d6048ffca 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline void inet_sk_init_flowi4(const struct inet_sock *inet, ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, inet->inet_dport, - inet->inet_sport, sk->sk_uid); + inet->inet_sport, sk_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); } @@ -331,7 +331,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, - src, dport, sport, sk->sk_uid); + src, dport, sport, sk_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, diff --git a/include/net/sock.h b/include/net/sock.h index ca532227cbfd..fc5e6f66b00a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2076,6 +2076,7 @@ static inline void sock_orphan(struct sock *sk) sock_set_flag(sk, SOCK_DEAD); sk_set_socket(sk, NULL); sk->sk_wq = NULL; + /* Note: sk_uid is unchanged. */ write_unlock_bh(&sk->sk_callback_lock); } @@ -2086,18 +2087,25 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); - sk->sk_uid = SOCK_INODE(parent)->i_uid; + WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } kuid_t sock_i_uid(struct sock *sk); + +static inline kuid_t sk_uid(const struct sock *sk) +{ + /* Paired with WRITE_ONCE() in sockfs_setattr() */ + return READ_ONCE(sk->sk_uid); +} + unsigned long __sock_i_ino(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) { - return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); + return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0); } static inline u32 net_tx_rndhash(void) -- cgit v1.2.3 From c51da3f7a161c6822232be832abdffe47eb55b4c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jun 2025 13:30:01 +0000 Subject: net: remove sock_i_uid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Difference between sock_i_uid() and sk_uid() is that after sock_orphan(), sock_i_uid() returns GLOBAL_ROOT_UID while sk_uid() returns the last cached sk->sk_uid value. None of sock_i_uid() callers care about this. Use sk_uid() which is much faster and inlined. Note that diag/dump users are calling sock_i_ino() and can not see the full benefit yet. Signed-off-by: Eric Dumazet Cc: Lorenzo Colitti Reviewed-by: Maciej Żenczykowski Link: https://patch.msgid.link/20250620133001.4090592-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index fc5e6f66b00a..bbd97fbc5935 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2092,8 +2092,6 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } -kuid_t sock_i_uid(struct sock *sk); - static inline kuid_t sk_uid(const struct sock *sk) { /* Paired with WRITE_ONCE() in sockfs_setattr() */ -- cgit v1.2.3 From 3169e36ae14802b01abe4bfa7ec593b0a1af5cc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jun 2025 15:55:35 +0000 Subject: net: make sk->sk_sndtimeo lockless Followup of commit 285975dd6742 ("net: annotate data-races around sk->sk_{rcv|snd}timeo"). Remove lock_sock()/release_sock() from sock_set_sndtimeo(), and add READ_ONCE()/WRITE_ONCE() where it is needed. Also SO_SNDTIMEO_OLD and SO_SNDTIMEO_NEW can call sock_set_timeout() without holding the socket lock. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250620155536.335520-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index bbd97fbc5935..b08e36bf9669 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2601,7 +2601,7 @@ static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) static inline long sock_sndtimeo(const struct sock *sk, bool noblock) { - return noblock ? 0 : sk->sk_sndtimeo; + return noblock ? 0 : READ_ONCE(sk->sk_sndtimeo); } static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) -- cgit v1.2.3 From 935b67675a9f233aa4ac4ae6452b2cc45418d839 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jun 2025 15:55:36 +0000 Subject: net: make sk->sk_rcvtimeo lockless Followup of commit 285975dd6742 ("net: annotate data-races around sk->sk_{rcv|snd}timeo"). Remove lock_sock()/release_sock() from ksmbd_tcp_rcv_timeout() and add READ_ONCE()/WRITE_ONCE() where it is needed. Also SO_RCVTIMEO_OLD and SO_RCVTIMEO_NEW can call sock_set_timeout() without holding the socket lock. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250620155536.335520-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index b08e36bf9669..0f2443d4ec58 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2596,7 +2596,7 @@ static inline gfp_t gfp_memcg_charge(void) static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { - return noblock ? 0 : sk->sk_rcvtimeo; + return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo); } static inline long sock_sndtimeo(const struct sock *sk, bool noblock) -- cgit v1.2.3 From bbb7d478d91ac4d5c288e226cc8744daf3820798 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 18 Jun 2025 15:07:22 -0700 Subject: net: phy: Add interface types for 50G and 100G Add support for 802.3cd based interface types 50GBASE-R and 100GBASE-P. This choice in naming is based on section 135 of the 802.3-2022 IEEE Standard. In addition it is adding support for what I am referring to as LAUI which is based on annex 135C of the IEEE Standard, and shares many similarities with the 25/50G consortium. The main difference between the two is that IEEE spec refers to LAUI as the AUI before the RS(544/514) FEC, whereas the 25/50G use this lane and frequency combination after going through RS(528/514), Base-R or no FEC at all. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/175028444205.625704.4191700324472974116.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Paolo Abeni --- include/linux/phy.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index b037aab7b71d..74c1bcf64b3c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -103,6 +103,9 @@ extern const int phy_basic_ports_array[3]; * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN * @PHY_INTERFACE_MODE_10G_QXGMII: 10G-QXGMII - 4 ports over 10G USXGMII + * @PHY_INTERFACE_MODE_50GBASER: 50GBase-R - with Clause 134 FEC + * @PHY_INTERFACE_MODE_LAUI: 50 Gigabit Attachment Unit Interface + * @PHY_INTERFACE_MODE_100GBASEP: 100GBase-P - with Clause 134 FEC * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -144,6 +147,9 @@ typedef enum { PHY_INTERFACE_MODE_QUSGMII, PHY_INTERFACE_MODE_1000BASEKX, PHY_INTERFACE_MODE_10G_QXGMII, + PHY_INTERFACE_MODE_50GBASER, + PHY_INTERFACE_MODE_LAUI, + PHY_INTERFACE_MODE_100GBASEP, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -260,6 +266,12 @@ static inline const char *phy_modes(phy_interface_t interface) return "qusgmii"; case PHY_INTERFACE_MODE_10G_QXGMII: return "10g-qxgmii"; + case PHY_INTERFACE_MODE_50GBASER: + return "50gbase-r"; + case PHY_INTERFACE_MODE_LAUI: + return "laui"; + case PHY_INTERFACE_MODE_100GBASEP: + return "100gbase-p"; default: return "unknown"; } -- cgit v1.2.3 From dad51ea09040f9ac2ea2a0f694e5e3ed5cf167b9 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 20 Jun 2025 12:02:40 +0200 Subject: net: pse-pd: tps23881: Clarify setup_pi_matrix callback documentation Improve the setup_pi_matrix callback documentation to clarify its purpose and usage. The enhanced description explains that PSE PI devicetree nodes are pre-parsed before this callback is invoked, and drivers should utilize pcdev->pi[x]->pairset[y].np to map PSE controller hardware ports to their corresponding Power Interfaces. This clarification helps driver implementers understand the callback's role in establishing the hardware-to-PI relationship mapping. Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250620-poe_doc_improve-v1-2-96357bb95d52@bootlin.com Signed-off-by: Paolo Abeni --- include/linux/pse-pd/pse.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index e5f305cef82e..4e5696cfade7 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -159,7 +159,13 @@ struct ethtool_pse_control_status { /** * struct pse_controller_ops - PSE controller driver callbacks * - * @setup_pi_matrix: setup PI matrix of the PSE controller + * @setup_pi_matrix: Setup PI matrix of the PSE controller. + * The PSE PIs devicetree nodes have already been parsed by + * of_load_pse_pis() and the pcdev->pi[x]->pairset[y].np + * populated. This callback should establish the + * relationship between the PSE controller hardware ports + * and the PSE Power Interfaces, either through software + * mapping or hardware configuration. * @pi_get_admin_state: Get the operational state of the PSE PI. This ops * is mandatory. * @pi_get_pw_status: Get the power detection status of the PSE PI. This -- cgit v1.2.3 From 7934a8dd8692b56714ce9b36421e316445d94a77 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 6 Jun 2025 13:10:23 +0900 Subject: module: remove meaningless 'name' parameter from __MODULE_INFO() The symbol names in the .modinfo section are never used and already randomized by the __UNIQUE_ID() macro. Therefore, the second parameter of __MODULE_INFO() is meaningless and can be removed to simplify the code. With this change, the symbol names in the .modinfo section will be prefixed with __UNIQUE_ID_modinfo, making it clearer that they originate from MODULE_INFO(). [Before] $ objcopy -j .modinfo vmlinux.o modinfo.o $ nm -n modinfo.o | head -n10 0000000000000000 r __UNIQUE_ID_license560 0000000000000011 r __UNIQUE_ID_file559 0000000000000030 r __UNIQUE_ID_description558 0000000000000074 r __UNIQUE_ID_license580 000000000000008e r __UNIQUE_ID_file579 00000000000000bd r __UNIQUE_ID_description578 00000000000000e6 r __UNIQUE_ID_license581 00000000000000ff r __UNIQUE_ID_file580 0000000000000134 r __UNIQUE_ID_description579 0000000000000179 r __UNIQUE_ID_uncore_no_discover578 [After] $ objcopy -j .modinfo vmlinux.o modinfo.o $ nm -n modinfo.o | head -n10 0000000000000000 r __UNIQUE_ID_modinfo560 0000000000000011 r __UNIQUE_ID_modinfo559 0000000000000030 r __UNIQUE_ID_modinfo558 0000000000000074 r __UNIQUE_ID_modinfo580 000000000000008e r __UNIQUE_ID_modinfo579 00000000000000bd r __UNIQUE_ID_modinfo578 00000000000000e6 r __UNIQUE_ID_modinfo581 00000000000000ff r __UNIQUE_ID_modinfo580 0000000000000134 r __UNIQUE_ID_modinfo579 0000000000000179 r __UNIQUE_ID_modinfo578 Signed-off-by: Masahiro Yamada Reviewed-by: Petr Pavlu --- include/crypto/algapi.h | 4 ++-- include/linux/module.h | 3 --- include/linux/moduleparam.h | 9 +++++---- include/net/tcp.h | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 188eface0a11..fc4574940636 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -43,8 +43,8 @@ * alias. */ #define MODULE_ALIAS_CRYPTO(name) \ - __MODULE_INFO(alias, alias_userspace, name); \ - __MODULE_INFO(alias, alias_crypto, "crypto-" name) + MODULE_INFO(alias, name); \ + MODULE_INFO(alias, "crypto-" name) struct crypto_aead; struct crypto_instance; diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..81b41cc6a19e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -164,9 +164,6 @@ extern void cleanup_module(void); struct module_kobject *lookup_or_create_module_kobject(const char *name); -/* Generic info of form tag = "info" */ -#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) - /* For userspace: you can also call me... */ #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..00166f747e27 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -20,18 +20,19 @@ /* Chosen so that structs with an unsigned long line up. */ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -#define __MODULE_INFO(tag, name, info) \ - static const char __UNIQUE_ID(name)[] \ +/* Generic info of form tag = "info" */ +#define MODULE_INFO(tag, info) \ + static const char __UNIQUE_ID(modinfo)[] \ __used __section(".modinfo") __aligned(1) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ - __MODULE_INFO(parmtype, name##type, #name ":" _type) + MODULE_INFO(parmtype, #name ":" _type) /* One for each parameter, describing how to use it. Some files do multiple of these per line, so can't just use MODULE_INFO. */ #define MODULE_PARM_DESC(_parm, desc) \ - __MODULE_INFO(parm, _parm, #_parm ":" desc) + MODULE_INFO(parm, #_parm ":" desc) struct kernel_param; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5078ad868fee..9b39ef630c92 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2662,8 +2662,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, void (*write_space)(struct sock *sk)); #define MODULE_ALIAS_TCP_ULP(name) \ - __MODULE_INFO(alias, alias_userspace, name); \ - __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) + MODULE_INFO(alias, name); \ + MODULE_INFO(alias, "tcp-ulp-" name) #ifdef CONFIG_NET_SOCK_MSG struct sk_msg; -- cgit v1.2.3 From e581b7fe62218d390520287e0095bfd6fe0454f8 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Wed, 28 May 2025 11:14:11 +0530 Subject: wifi: mac80211: add support towards MLO handling of station statistics Currently, in supporting API's to fill sinfo structure from sta structure, is mapped to fill the fields from sta->deflink. However, for multi-link (ML) station, sinfo structure should be filled from corresponding link_id. Therefore, add link_id as an additional argument in supporting API's for filling sinfo structure correctly. Link_id is set to -1 for non-ML station and corresponding link_id for ML stations. In supporting API's for filling sinfo structure, check for link_id, if link_id < 0, fill the sinfo structure from sta->deflink, otherwise fill from sta->link[link_id]. Current, changes are done at the deflink level i.e, pass -1 as link_id. Actual link_id will be added in subsequent patches to support station statistics for MLO. Signed-off-by: Sarika Sharma Link: https://patch.msgid.link/20250528054420.3050133-2-quic_sarishar@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 82617579d910..a305e7f9c6b2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7242,13 +7242,14 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); * ieee80211_ave_rssi - report the average RSSI for the specified interface * * @vif: the specified virtual interface + * @link_id: the link ID for MLO, or -1 for non-MLO * * Note: This function assumes that the given vif is valid. * * Return: The average RSSI value for the requested interface, or 0 if not * applicable. */ -int ieee80211_ave_rssi(struct ieee80211_vif *vif); +int ieee80211_ave_rssi(struct ieee80211_vif *vif, int link_id); /** * ieee80211_report_wowlan_wakeup - report WoWLAN wakeup -- cgit v1.2.3 From d2329fff7e527e8b350086be2e7cbf0d190177a3 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Wed, 28 May 2025 11:14:12 +0530 Subject: wifi: cfg80211: add link_station_info structure to support MLO statistics Current implementation of NL80211_GET_STATION does not work for multi-link operation(MLO) since in case of MLO only deflink (or one of the links) is considered and not all links. Therefore to support for MLO, add link_station_info structure to account link level statistics for station. Additionally, add valid_links in station_info structure to indicate bitmap of valid links for MLO. This will be helpful to check the link related statistics during MLO. Signed-off-by: Sarika Sharma Link: https://patch.msgid.link/20250528054420.3050133-3-quic_sarishar@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 101 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 47b4235eea59..b008357cac03 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2017,6 +2017,99 @@ struct cfg80211_tid_stats { #define IEEE80211_MAX_CHAINS 4 +/** + * struct link_station_info - link station information + * + * Link station information filled by driver for get_station() and + * dump_station(). + * @filled: bit flag of flags using the bits of &enum nl80211_sta_info to + * indicate the relevant values in this struct for them + * @connected_time: time(in secs) since a link of station is last connected + * @inactive_time: time since last activity for link station(tx/rx) + * in milliseconds + * @assoc_at: bootime (ns) of the last association of link of station + * @rx_bytes: bytes (size of MPDUs) received from this link of station + * @tx_bytes: bytes (size of MPDUs) transmitted to this link of station + * @signal: The signal strength, type depends on the wiphy's signal_type. + * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. + * @signal_avg: Average signal strength, type depends on the wiphy's + * signal_type. For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_ + * @chains: bitmask for filled values in @chain_signal, @chain_signal_avg + * @chain_signal: per-chain signal strength of last received packet in dBm + * @chain_signal_avg: per-chain signal strength average in dBm + * @txrate: current unicast bitrate from this link of station + * @rxrate: current unicast bitrate to this link of station + * @rx_packets: packets (MSDUs & MMPDUs) received from this link of station + * @tx_packets: packets (MSDUs & MMPDUs) transmitted to this link of station + * @tx_retries: cumulative retry counts (MPDUs) for this link of station + * @tx_failed: number of failed transmissions (MPDUs) (retries exceeded, no ACK) + * @rx_dropped_misc: Dropped for un-specified reason. + * @bss_param: current BSS parameters + * @beacon_loss_count: Number of times beacon loss event has triggered. + * @expected_throughput: expected throughput in kbps (including 802.11 headers) + * towards this station. + * @rx_beacon: number of beacons received from this peer + * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received + * from this peer + * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer + * @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer + * @airtime_weight: current airtime scheduling weight + * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last + * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. + * Note that this doesn't use the @filled bit, but is used if non-NULL. + * @ack_signal: signal strength (in dBm) of the last ACK frame. + * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has + * been sent. + * @rx_mpdu_count: number of MPDUs received from this station + * @fcs_err_count: number of packets (MPDUs) received from this station with + * an FCS error. This counter should be incremented only when TA of the + * received packet with an FCS error matches the peer MAC address. + * @addr: For MLO STA connection, filled with address of the link of station. + */ +struct link_station_info { + u64 filled; + u32 connected_time; + u32 inactive_time; + u64 assoc_at; + u64 rx_bytes; + u64 tx_bytes; + s8 signal; + s8 signal_avg; + + u8 chains; + s8 chain_signal[IEEE80211_MAX_CHAINS]; + s8 chain_signal_avg[IEEE80211_MAX_CHAINS]; + + struct rate_info txrate; + struct rate_info rxrate; + u32 rx_packets; + u32 tx_packets; + u32 tx_retries; + u32 tx_failed; + u32 rx_dropped_misc; + struct sta_bss_parameters bss_param; + + u32 beacon_loss_count; + + u32 expected_throughput; + + u64 tx_duration; + u64 rx_duration; + u64 rx_beacon; + u8 rx_beacon_signal_avg; + + u16 airtime_weight; + + s8 ack_signal; + s8 avg_ack_signal; + struct cfg80211_tid_stats *pertid; + + u32 rx_mpdu_count; + u32 fcs_err_count; + + u8 addr[ETH_ALEN] __aligned(2); +}; + /** * struct station_info - station information * @@ -2101,6 +2194,11 @@ struct cfg80211_tid_stats { * dump_station() callbacks. User space needs this information to determine * the accepted and rejected affiliated links of the connected station. * @assoc_resp_ies_len: Length of @assoc_resp_ies buffer in octets. + * @valid_links: bitmap of valid links, or 0 for non-MLO. Drivers fill this + * information in cfg80211_new_sta(), cfg80211_del_sta_sinfo(), + * get_station() and dump_station() callbacks. + * @links: reference to Link sta entries for MLO STA, all link specific + * information is accessed through links[link_id]. */ struct station_info { u64 filled; @@ -2165,6 +2263,9 @@ struct station_info { u8 mld_addr[ETH_ALEN] __aligned(2); const u8 *assoc_resp_ies; size_t assoc_resp_ies_len; + + u16 valid_links; + struct link_station_info *links[IEEE80211_MLD_MAX_NUM_LINKS]; }; /** -- cgit v1.2.3 From 49e47223ecc4af0bd15b5267184d46b3654d520b Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Wed, 28 May 2025 11:14:15 +0530 Subject: wifi: cfg80211: allocate memory for link_station info structure Currently, station_info structure is passed to fill station statistics from mac80211/drivers. After NL message send to user space for requested station statistics, memory for station statistics is freed in cfg80211. Therefore, memory allocation/free for link station statistics should also happen in cfg80211 only. Hence, allocate the memory for link_station structure for all possible links and free in cfg80211_sinfo_release_content(). Signed-off-by: Sarika Sharma Link: https://patch.msgid.link/20250528054420.3050133-6-quic_sarishar@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b008357cac03..7bf0c97d2ab1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8577,6 +8577,13 @@ int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp); static inline void cfg80211_sinfo_release_content(struct station_info *sinfo) { kfree(sinfo->pertid); + + for (int link_id = 0; link_id < ARRAY_SIZE(sinfo->links); link_id++) { + if (sinfo->links[link_id]) { + kfree(sinfo->links[link_id]->pertid); + kfree(sinfo->links[link_id]); + } + } } /** -- cgit v1.2.3 From 505991fba9ec112770c79a0fea56b4c49a5ad2fa Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Wed, 28 May 2025 11:14:18 +0530 Subject: wifi: mac80211: extend support to fill link level sinfo structure Currently, sinfo structure is supported to fill information at deflink( or one of the links) level for station. This has problems when applied to fetch multi-link(ML) station information. Hence, if valid_links are present, support filling link_station structure for each link. This will be helpful to check the link related statistics during MLO. Additionally, TXQ stats for pertid are applicable at station level not at link level. Therefore check link_id is less then 0, before filling TXQ stats in pertid stats. Signed-off-by: Sarika Sharma Link: https://patch.msgid.link/20250528054420.3050133-9-quic_sarishar@quicinc.com [fix some indentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7bf0c97d2ab1..eec066f4738a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8566,6 +8566,17 @@ void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, */ int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp); +/** + * cfg80211_link_sinfo_alloc_tid_stats - allocate per-tid statistics. + * + * @link_sinfo: the link station information + * @gfp: allocation flags + * + * Return: 0 on success. Non-zero on error. + */ +int cfg80211_link_sinfo_alloc_tid_stats(struct link_station_info *link_sinfo, + gfp_t gfp); + /** * cfg80211_sinfo_release_content - release contents of station info * @sinfo: the station information -- cgit v1.2.3 From 4cb1ce7e254adeeeec7ccbb45125307aec4d0f0b Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Wed, 28 May 2025 11:14:20 +0530 Subject: wifi: mac80211: add link_sta_statistics ops to fill link station statistics Currently, link station statistics for MLO are filled by mac80211. But there are some statistics that kept by mac80211 might not be accurate, so let the driver pre-fill the link statistics. The driver can fill the values (indicating which field is filled, by setting the filled bitmapin in link_station structure). Statistics that driver don't fill are filled by mac80211. Hence, add link_sta_statistics callback to fill link station statistics for MLO in sta_set_link_sinfo() by drivers. Signed-off-by: Sarika Sharma Link: https://patch.msgid.link/20250528054420.3050133-11-quic_sarishar@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a305e7f9c6b2..fa2325692abf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4133,6 +4133,15 @@ struct ieee80211_prep_tx_info { * Statistics that the driver doesn't fill will be filled by mac80211. * The callback can sleep. * + * @link_sta_statistics: Get link statistics for this station. For example with + * beacon filtering, the statistics kept by mac80211 might not be + * accurate, so let the driver pre-fill the statistics. The driver can + * fill most of the values (indicating which by setting the filled + * bitmap), but not all of them make sense - see the source for which + * ones are possible. + * Statistics that the driver doesn't fill will be filled by mac80211. + * The callback can sleep. + * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. * Returns a negative error code on failure. @@ -4627,6 +4636,10 @@ struct ieee80211_ops { s64 offset); void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*tx_last_beacon)(struct ieee80211_hw *hw); + void (*link_sta_statistics)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_sta *link_sta, + struct link_station_info *link_sinfo); /** * @ampdu_action: -- cgit v1.2.3 From b74947b4f6ff7c122a1bb6eb38bb7ecfbb1d3820 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Sun, 15 Jun 2025 13:53:09 +0530 Subject: wifi: cfg80211/mac80211: Add support to get radio index Currently, per-radio attributes are set on per-phy basis, i.e., all the radios present in a wiphy will take attributes values sent from user. But each radio in a wiphy can get different values from userspace based on its requirement. To extend support to set per-radio attributes, add support to get radio index from userspace. Add an NL attribute - NL80211_ATTR_WIPHY_RADIO_INDEX, to get user specified radio index for which attributes should be changed. Pass this to individual drivers, so that the drivers can use this radio index to change per-radio attributes when necessary. Currently, per-radio attributes identified are: NL80211_ATTR_WIPHY_TX_POWER_LEVEL NL80211_ATTR_WIPHY_ANTENNA_TX NL80211_ATTR_WIPHY_ANTENNA_RX NL80211_ATTR_WIPHY_RETRY_SHORT NL80211_ATTR_WIPHY_RETRY_LONG NL80211_ATTR_WIPHY_FRAG_THRESHOLD NL80211_ATTR_WIPHY_RTS_THRESHOLD NL80211_ATTR_WIPHY_COVERAGE_CLASS NL80211_ATTR_TXQ_LIMIT NL80211_ATTR_TXQ_MEMORY_LIMIT NL80211_ATTR_TXQ_QUANTUM By default, the radio index is set to -1. This means the attribute should be treated as a global configuration. If the user has not specified any index, then the radio index passed to individual drivers would be -1. This would indicate that the attribute applies to all radios in that wiphy. Signed-off-by: Roopni Devanathan Link: https://patch.msgid.link/20250615082312.619639-2-quic_rdevanat@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++---- include/net/mac80211.h | 17 +++++++++++------ include/uapi/linux/nl80211.h | 10 ++++++++++ 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index eec066f4738a..ffd9564fc840 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4853,12 +4853,14 @@ struct cfg80211_ops { int (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev, int rate[NUM_NL80211_BANDS]); - int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); + int (*set_wiphy_params)(struct wiphy *wiphy, int radio_idx, + u32 changed); int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, + int radio_idx, enum nl80211_tx_power_setting type, int mbm); int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, - unsigned int link_id, int *dbm); + int radio_idx, unsigned int link_id, int *dbm); void (*rfkill_poll)(struct wiphy *wiphy); @@ -4920,8 +4922,10 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct mgmt_frame_regs *upd); - int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant); - int (*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant); + int (*set_antenna)(struct wiphy *wiphy, int radio_idx, + u32 tx_ant, u32 rx_ant); + int (*get_antenna)(struct wiphy *wiphy, int radio_idx, + u32 *tx_ant, u32 *rx_ant); int (*sched_scan_start)(struct wiphy *wiphy, struct net_device *dev, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fa2325692abf..a0de0da4d79b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4517,7 +4517,7 @@ struct ieee80211_ops { enum nl80211_iftype new_type, bool p2p); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); - int (*config)(struct ieee80211_hw *hw, u32 changed); + int (*config)(struct ieee80211_hw *hw, int radio_idx, u32 changed); void (*bss_info_changed)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, @@ -4580,8 +4580,10 @@ struct ieee80211_ops { void (*get_key_seq)(struct ieee80211_hw *hw, struct ieee80211_key_conf *key, struct ieee80211_key_seq *seq); - int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); - int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); + int (*set_frag_threshold)(struct ieee80211_hw *hw, int radio_idx, + u32 value); + int (*set_rts_threshold)(struct ieee80211_hw *hw, int radio_idx, + u32 value); int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -4678,7 +4680,8 @@ struct ieee80211_ops { int (*get_survey)(struct ieee80211_hw *hw, int idx, struct survey_info *survey); void (*rfkill_poll)(struct ieee80211_hw *hw); - void (*set_coverage_class)(struct ieee80211_hw *hw, s16 coverage_class); + void (*set_coverage_class)(struct ieee80211_hw *hw, int radio_idx, + s16 coverage_class); #ifdef CONFIG_NL80211_TESTMODE int (*testmode_cmd)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void *data, int len); @@ -4693,8 +4696,10 @@ struct ieee80211_ops { void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel_switch *ch_switch); - int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); - int (*get_antenna)(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant); + int (*set_antenna)(struct ieee80211_hw *hw, int radio_idx, + u32 tx_ant, u32 rx_ant); + int (*get_antenna)(struct ieee80211_hw *hw, int radio_idx, + u32 *tx_ant, u32 *rx_ant); int (*remain_on_channel)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a289014abe37..2a71149c3065 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2907,6 +2907,14 @@ enum nl80211_commands { * APs Support". Drivers may set additional flags that they support * in the kernel or device. * + * @NL80211_ATTR_WIPHY_RADIO_INDEX: (int) Integer attribute denoting the index + * of the radio in interest. Internally a value of -1 is used to + * indicate that the radio id is not given in user-space. This means + * that all the attributes are applicable to all the radios. If there is + * a radio index provided in user-space, the attributes will be + * applicable to that specific radio only. If the radio id is greater + * thank the number of radios, error denoting invalid value is returned. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3464,6 +3472,8 @@ enum nl80211_attrs { NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS, + NL80211_ATTR_WIPHY_RADIO_INDEX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 264637941cf45cd3ffe070e25853d7e1a29f2004 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Sun, 15 Jun 2025 13:53:10 +0530 Subject: wifi: cfg80211: Add Support to Set RTS Threshold for each Radio Currently, setting RTS threshold is based on per-phy basis, i.e., all the radios present in a wiphy will take RTS threshold value to be the one sent from userspace. But each radio in a multi-radio wiphy can have different RTS threshold requirements. To extend support to set RTS threshold for each radio, get the radio for which RTS threshold needs to be changed from the user. Use the attribute in NL - NL80211_ATTR_WIPHY_RADIO_INDEX, to identify the radio of interest. Create a new structure - wiphy_radio_cfg and add rts_threshold in it as a u32 value to store RTS threshold of each radio in a wiphy and allocate memory for it during wiphy register based on the wiphy.n_radio updated by drivers. Pass radio id received from the user to mac80211 drivers along with its corresponding RTS threshold. Signed-off-by: Roopni Devanathan Link: https://patch.msgid.link/20250615082312.619639-3-quic_rdevanat@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ffd9564fc840..0003733b1e77 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5547,6 +5547,18 @@ struct wiphy_iftype_akm_suites { int n_akm_suites; }; +/** + * struct wiphy_radio_cfg - physical radio config of a wiphy + * This structure describes the configurations of a physical radio in a + * wiphy. It is used to denote per-radio attributes belonging to a wiphy. + * + * @rts_threshold: RTS threshold (dot11RTSThreshold); + * -1 (default) = RTS/CTS disabled + */ +struct wiphy_radio_cfg { + u32 rts_threshold; +}; + /** * struct wiphy_radio_freq_range - wiphy frequency range * @start_freq: start range edge frequency (kHz) @@ -5802,6 +5814,10 @@ struct wiphy_radio { * supports enabling HW timestamping for all peers (i.e. no need to * specify a mac address). * + * @radio_cfg: configuration of radios belonging to a muli-radio wiphy. This + * struct contains a list of all radio specific attributes and should be + * used only for multi-radio wiphy. + * * @radio: radios belonging to this wiphy * @n_radio: number of radios */ @@ -5891,6 +5907,8 @@ struct wiphy { void (*reg_notifier)(struct wiphy *wiphy, struct regulatory_request *request); + struct wiphy_radio_cfg *radio_cfg; + /* fields below are read-only, assigned by cfg80211 */ const struct ieee80211_regdomain __rcu *regd; -- cgit v1.2.3 From 89595190058c6e9ca4a8ca7d49be3fc8d2395e79 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Sun, 15 Jun 2025 13:53:11 +0530 Subject: wifi: cfg80211: Report per-radio RTS threshold to userspace In case of multi-radio wiphys, with per-radio RTS threshold brought into use, RTS threshold for each radio in a wiphy can be recorded in wiphy parameter - wiphy_radio_cfg, as an array. Add a new attribute - NL80211_WIPHY_RADIO_ATTR_RTS_THRESHOLD in nested parameter - NL80211_ATTR_WIPHY_RADIOS. When a request for getting RTS threshold for a particular radio is received, parse the radio id and get the required data. Add this data to the newly added nested attribute NL80211_WIPHY_RADIO_ATTR_RTS_THRESHOLD. Add support to report this data to userspace. Signed-off-by: Roopni Devanathan Link: https://patch.msgid.link/20250615082312.619639-4-quic_rdevanat@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2a71149c3065..39460334dafb 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -8106,6 +8106,7 @@ enum nl80211_ap_settings_flags { * and contains attributes defined in &enum nl80211_if_combination_attrs. * @NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK: bitmask (u32) of antennas * connected to this radio. + * @NL80211_WIPHY_RADIO_ATTR_RTS_THRESHOLD: RTS threshold (u32) of this radio. * * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute @@ -8117,6 +8118,7 @@ enum nl80211_wiphy_radio_attrs { NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, + NL80211_WIPHY_RADIO_ATTR_RTS_THRESHOLD, /* keep last */ __NL80211_WIPHY_RADIO_ATTR_LAST, -- cgit v1.2.3 From 5ea255673cdb4a9bf99dd3e4fc9ca1089f5692a3 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 17 Jun 2025 18:06:07 +1000 Subject: wifi: cfg80211: support configuration of S1G station capabilities Currently there is no support for initialising a peers S1G capabilities, this patch adds support for configuring an S1G station. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250617080610.756048-2-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0003733b1e77..4a092da3a9de 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -560,7 +560,7 @@ struct ieee80211_sta_s1g_cap { * @vht_cap: VHT capabilities in this band * @s1g_cap: S1G capabilities in this band * @edmg_cap: EDMG capabilities in this band - * @s1g_cap: S1G capabilities in this band (S1B band only, of course) + * @s1g_cap: S1G capabilities in this band (S1G band only, of course) * @n_iftype_data: number of iftype data entries * @iftype_data: interface type data entries. Note that the bits in * @types_mask inside this structure cannot overlap (i.e. only @@ -1653,6 +1653,7 @@ struct sta_txpwr { * @he_6ghz_capa: HE 6 GHz Band capabilities of station * @eht_capa: EHT capabilities of station * @eht_capa_len: the length of the EHT capabilities + * @s1g_capa: S1G capabilities of station */ struct link_station_parameters { const u8 *mld_mac; @@ -1671,6 +1672,7 @@ struct link_station_parameters { const struct ieee80211_he_6ghz_capa *he_6ghz_capa; const struct ieee80211_eht_cap_elem *eht_capa; u8 eht_capa_len; + const struct ieee80211_s1g_cap *s1g_capa; }; /** -- cgit v1.2.3 From 2a8a6b7c4cb03808a707ae19b2f0c5eb9b631e9e Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 17 Jun 2025 18:06:08 +1000 Subject: wifi: mac80211: handle station association response with S1G Add support for updating the stations S1G capabilities when an S1G association occurs. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250617080610.756048-3-lachlan.hodges@morsemicro.com [remove unused S1G_CAP3_MAX_MPDU_LEN_3895/_7791] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a0de0da4d79b..dcd5969bb559 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2428,6 +2428,7 @@ struct ieee80211_sta_aggregates { * @he_cap: HE capabilities of this STA * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities * @eht_cap: EHT capabilities of this STA + * @s1g_cap: S1G capabilities of this STA * @agg: per-link data for multi-link aggregation * @bandwidth: current bandwidth the station can receive with * @rx_nss: in HT/VHT, the maximum number of spatial streams the @@ -2450,6 +2451,7 @@ struct ieee80211_link_sta { struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; + struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_sta_aggregates agg; -- cgit v1.2.3 From a4c746f06853f91d3759ae8aca514d135b6aa56d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 15:48:42 +0200 Subject: uapi/fcntl: mark range as reserved Mark the range from -10000 to -40000 as a range reserved for special in-kernel values. Move the PIDFD_SELF_*/PIDFD_THREAD_* sentinels over so all the special values are in one place. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-6-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 16 ++++++++++++++++ include/uapi/linux/pidfd.h | 15 --------------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a15ac2fa4b20..ed02d8ae0667 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -90,10 +90,26 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* Reserved kernel ranges [-100], [-10000, -40000]. */ #define AT_FDCWD -100 /* Special value for dirfd used to indicate openat should use the current working directory. */ +/* + * The concept of process and threads in userland and the kernel is a confusing + * one - within the kernel every thread is a 'task' with its own individual PID, + * however from userland's point of view threads are grouped by a single PID, + * which is that of the 'thread group leader', typically the first thread + * spawned. + * + * To cut the Gideon knot, for internal kernel usage, we refer to + * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel + * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread + * group leader... + */ +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ + /* Generic flags for the *at(2) family of syscalls. */ diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index c27a4e238e4b..957db425d459 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -42,21 +42,6 @@ #define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ #define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ -/* - * The concept of process and threads in userland and the kernel is a confusing - * one - within the kernel every thread is a 'task' with its own individual PID, - * however from userland's point of view threads are grouped by a single PID, - * which is that of the 'thread group leader', typically the first thread - * spawned. - * - * To cut the Gideon knot, for internal kernel usage, we refer to - * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel - * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread - * group leader... - */ -#define PIDFD_SELF_THREAD -10000 /* Current thread. */ -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ - /* * ...and for userland we make life simpler - PIDFD_SELF refers to the current * thread, PIDFD_SELF_PROCESS refers to the process thread group leader. -- cgit v1.2.3 From 67fcec2919e4ed31ab845eb456ad7d6f1e85505c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 15:48:49 +0200 Subject: fcntl/pidfd: redefine PIDFD_SELF_THREAD_GROUP Don't jump somewhere into the middle of the reserved range. We're still able to change that value it won't be that widely used yet. If not, we can revert. Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ed02d8ae0667..ba4a698d2f33 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -108,7 +108,7 @@ * group leader... */ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ /* Generic flags for the *at(2) family of syscalls. */ -- cgit v1.2.3 From cd5d2006327b6d8488612cb8c03ad7304417c8f2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:10 +0200 Subject: uapi/fcntl: add FD_INVALID Add a marker for an invalid file descriptor. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-7-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ba4a698d2f33..a5bebe7c4400 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -110,6 +110,7 @@ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ +#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ -- cgit v1.2.3 From b39f7d75dc41b5f5d028192cd5d66cff71179f35 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 24 Jun 2025 14:21:27 +0100 Subject: fs: Remove three arguments from block_write_end() block_write_end() looks like it can be used as a ->write_end() implementation. However, it can't as it does not unlock nor put the folio. Since it does not use the 'file', 'mapping' nor 'fsdata' arguments, remove them. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/20250624132130.1590285-1-willy@infradead.org Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 0029ff880e27..178eb90e9cf3 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -262,9 +262,7 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, get_block_t *get_block); int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); -int block_write_end(struct file *, struct address_space *, - loff_t, unsigned len, unsigned copied, - struct folio *, void *); +int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *); int generic_write_end(struct file *, struct address_space *, loff_t, unsigned len, unsigned copied, struct folio *, void *); -- cgit v1.2.3 From 6241b49540a65a6d5274fa938fd3eb4cbfe2e076 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 24 Jun 2025 10:06:41 +0200 Subject: tty: fix tty_port_tty_*hangup() kernel-doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit below added a new helper, but omitted to move (and add) the corressponding kernel-doc. Do it now. Signed-off-by: "Jiri Slaby (SUSE)" Fixes: 2b5eac0f8c6e ("tty: introduce and use tty_port_tty_vhangup() helper") Link: https://lore.kernel.org/all/b23d566c-09dc-7374-cc87-0ad4660e8b2e@linux.intel.com/ Reported-by: Ilpo Järvinen Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20250624080641.509959-6-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 021f9a8415c0..332ddb93603e 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -251,11 +251,20 @@ static inline int tty_port_users(struct tty_port *port) return port->count + port->blocked_open; } +/** + * tty_port_tty_hangup - helper to hang up a tty asynchronously + * @port: tty port + * @check_clocal: hang only ttys with %CLOCAL unset? + */ static inline void tty_port_tty_hangup(struct tty_port *port, bool check_clocal) { __tty_port_tty_hangup(port, check_clocal, true); } +/** + * tty_port_tty_vhangup - helper to hang up a tty synchronously + * @port: tty port + */ static inline void tty_port_tty_vhangup(struct tty_port *port) { __tty_port_tty_hangup(port, false, false); -- cgit v1.2.3 From af4db5a35a4ef7a68046883bfd12468007db38f1 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 18 Jun 2025 22:49:42 +0000 Subject: usb: typec: altmodes/displayport: do not index invalid pin_assignments A poorly implemented DisplayPort Alt Mode port partner can indicate that its pin assignment capabilities are greater than the maximum value, DP_PIN_ASSIGN_F. In this case, calls to pin_assignment_show will cause a BRK exception due to an out of bounds array access. Prevent for loop in pin_assignment_show from accessing invalid values in pin_assignments by adding DP_PIN_ASSIGN_MAX value in typec_dp.h and using i < DP_PIN_ASSIGN_MAX as a loop condition. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250618224943.3263103-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_dp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ -- cgit v1.2.3 From 3941e37f62fe2c3c8b8675c12183185f20450539 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 16:57:51 +0200 Subject: uapi/fcntl: add FD_PIDFS_ROOT Add a special file descriptor indicating the root of the pidfs filesystem. Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a5bebe7c4400..f291ab4f94eb 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -110,6 +110,7 @@ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ +#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ #define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ -- cgit v1.2.3 From a02fd05661d73a8507dd70dd820e9b984490c545 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 24 Jun 2025 14:29:27 +0800 Subject: PCI: Extend isolated function probing to LoongArch Like s390 and the jailhouse hypervisor, LoongArch's PCI architecture allows passing isolated PCI functions to a guest OS instance. So it is possible that there is a multi-function device without function 0 for the host or guest. Allow probing such functions by adding a IS_ENABLED(CONFIG_LOONGARCH) case in the hypervisor_isolated_pci_functions() helper. This is similar to commit 189c6c33ff42 ("PCI: Extend isolated function probing to s390"). Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250624062927.4037734-1-chenhuacai@loongson.cn --- include/linux/hypervisor.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index 9efbc54e35e5..be5417303ecf 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -37,6 +37,9 @@ static inline bool hypervisor_isolated_pci_functions(void) if (IS_ENABLED(CONFIG_S390)) return true; + if (IS_ENABLED(CONFIG_LOONGARCH)) + return true; + return jailhouse_paravirt(); } -- cgit v1.2.3 From 2ce80bc11ff91913bc6eb31a5876621f090cf5cc Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 14 Jun 2025 02:03:43 +0100 Subject: misc: vmw_vmci: Remove unused vmci_doorbell_notify vmci_doorbell_notify() was added in 2013 by commit 83e2ec765be0 ("VMCI: doorbell implementation.") but has remained unused. Remove it. Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250614010344.636076-3-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_api.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h index f28907345c80..28a3b6a9e1ca 100644 --- a/include/linux/vmw_vmci_api.h +++ b/include/linux/vmw_vmci_api.h @@ -35,7 +35,6 @@ int vmci_doorbell_create(struct vmci_handle *handle, u32 flags, u32 priv_flags, vmci_callback notify_cb, void *client_data); int vmci_doorbell_destroy(struct vmci_handle handle); -int vmci_doorbell_notify(struct vmci_handle handle, u32 priv_flags); u32 vmci_get_context_id(void); bool vmci_is_context_owner(u32 context_id, kuid_t uid); int vmci_register_vsock_callback(vmci_vsock_cb callback); -- cgit v1.2.3 From ea6895f021c0716a280eff576d3f64f549187faa Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 14 Jun 2025 02:03:44 +0100 Subject: misc: vmw_vmci: Remove unused qpair functions vmci_qpair_dequeue(), vmci_qpair_enqueue() and vmci_qpair_peek() were added in 2013 by commit 06164d2b72aa ("VMCI: queue pairs implementation.") but have remained unused. Remove them. (The iov version of those functions is used) Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250614010344.636076-4-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_api.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h index 28a3b6a9e1ca..41764a684423 100644 --- a/include/linux/vmw_vmci_api.h +++ b/include/linux/vmw_vmci_api.h @@ -60,12 +60,6 @@ s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair); s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair); s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair); s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair); -ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, - const void *buf, size_t buf_size, int mode); -ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, - void *buf, size_t buf_size, int mode); -ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size, - int mode); ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int mode); ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, -- cgit v1.2.3 From 7aecbbdf86d3288576a84df360f96b93ceec7ee2 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 20 Jun 2025 22:35:18 +0800 Subject: char: misc: Remove redundant forward declarations Header miscdevice.h includes linux/device.h which has definations for below two forward declarations directly or indirectly: struct device; struct attribute_group; Remove these redundant forward declarations from miscdevice.h Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250620-fix_mischar-v1-1-6c2716bbf1fa@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 69e110c2b86a..3e6deb00fc85 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -73,9 +73,6 @@ #define RFKILL_MINOR 242 #define MISC_DYNAMIC_MINOR 255 -struct device; -struct attribute_group; - struct miscdevice { int minor; const char *name; -- cgit v1.2.3 From cb444006a625c60e6d4dd3753863c3c74f96aac3 Mon Sep 17 00:00:00 2001 From: David Dai Date: Tue, 24 Jun 2025 15:49:06 -0700 Subject: sched_ext, rcu: Eject BPF scheduler on RCU CPU stall panic For systems using a sched_ext scheduler and has panic_on_rcu_stall enabled, try kicking out the current scheduler before issuing a panic. While there are numerous reasons for RCU CPU stalls that are not directly attributed to the scheduler, deferring the panic gives sched_ext an opportunity to provide additional debug info when ejecting the current scheduler. Also, handling the event more gracefully allows us to potentially recover the system instead of incurring additional down time. Suggested-by: Tejun Heo Reviewed-by: Paul E. McKenney Signed-off-by: David Dai Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 8b92842776cb..0cf0915572c9 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -206,12 +206,14 @@ struct sched_ext_entity { void sched_ext_free(struct task_struct *p); void print_scx_info(const char *log_lvl, struct task_struct *p); void scx_softlockup(u32 dur_s); +bool scx_rcu_cpu_stall(void); #else /* !CONFIG_SCHED_CLASS_EXT */ static inline void sched_ext_free(struct task_struct *p) {} static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} static inline void scx_softlockup(u32 dur_s) {} +static inline bool scx_rcu_cpu_stall(void) { return false; } #endif /* CONFIG_SCHED_CLASS_EXT */ -- cgit v1.2.3 From c9e78afa688afec528784b79bb02d513cdcd6527 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Jun 2025 12:53:55 +0200 Subject: udp_tunnel: fix deadlock in udp_tunnel_nic_set_port_priv() While configuring a vxlan tunnel in a system with a i40e NIC driver, I observe the following deadlock: WARNING: possible recursive locking detected 6.16.0-rc2.net-next-6.16_92d87230d899+ #13 Tainted: G E -------------------------------------------- kworker/u256:4/1125 is trying to acquire lock: ffff88921ab9c8c8 (&utn->lock){+.+.}-{4:4}, at: i40e_udp_tunnel_set_port (/home/pabeni/net-next/include/net/udp_tunnel.h:343 /home/pabeni/net-next/drivers/net/ethernet/intel/i40e/i40e_main.c:13013) i40e but task is already holding lock: ffff88921ab9c8c8 (&utn->lock){+.+.}-{4:4}, at: udp_tunnel_nic_device_sync_work (/home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:739) udp_tunnel other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&utn->lock); lock(&utn->lock); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by kworker/u256:4/1125: #0: ffff8892910ca158 ((wq_completion)udp_tunnel_nic){+.+.}-{0:0}, at: process_one_work (/home/pabeni/net-next/kernel/workqueue.c:3213) #1: ffffc900244efd30 ((work_completion)(&utn->work)){+.+.}-{0:0}, at: process_one_work (/home/pabeni/net-next/kernel/workqueue.c:3214) #2: ffffffff9a14e290 (rtnl_mutex){+.+.}-{4:4}, at: udp_tunnel_nic_device_sync_work (/home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:737) udp_tunnel #3: ffff88921ab9c8c8 (&utn->lock){+.+.}-{4:4}, at: udp_tunnel_nic_device_sync_work (/home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:739) udp_tunnel stack backtrace: Hardware name: Dell Inc. PowerEdge R7525/0YHMCJ, BIOS 2.2.5 04/08/2021 i Call Trace: dump_stack_lvl (/home/pabeni/net-next/lib/dump_stack.c:123) print_deadlock_bug (/home/pabeni/net-next/kernel/locking/lockdep.c:3047) validate_chain (/home/pabeni/net-next/kernel/locking/lockdep.c:3901) __lock_acquire (/home/pabeni/net-next/kernel/locking/lockdep.c:5240) lock_acquire.part.0 (/home/pabeni/net-next/kernel/locking/lockdep.c:473 /home/pabeni/net-next/kernel/locking/lockdep.c:5873) __mutex_lock (/home/pabeni/net-next/kernel/locking/mutex.c:604 /home/pabeni/net-next/kernel/locking/mutex.c:747) i40e_udp_tunnel_set_port (/home/pabeni/net-next/include/net/udp_tunnel.h:343 /home/pabeni/net-next/drivers/net/ethernet/intel/i40e/i40e_main.c:13013) i40e udp_tunnel_nic_device_sync_by_port (/home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:230 /home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:249) udp_tunnel __udp_tunnel_nic_device_sync.part.0 (/home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:292) udp_tunnel udp_tunnel_nic_device_sync_work (/home/pabeni/net-next/net/ipv4/udp_tunnel_nic.c:742) udp_tunnel process_one_work (/home/pabeni/net-next/kernel/workqueue.c:3243) worker_thread (/home/pabeni/net-next/kernel/workqueue.c:3315 /home/pabeni/net-next/kernel/workqueue.c:3402) kthread (/home/pabeni/net-next/kernel/kthread.c:464) AFAICS all the existing callsites of udp_tunnel_nic_set_port_priv() are already under the utn lock scope, avoid (re-)acquiring it in such a function. Fixes: 1ead7501094c ("udp_tunnel: remove rtnl_lock dependency") Signed-off-by: Paolo Abeni Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/95a827621ec78c12d1564ec3209e549774f9657d.1750675978.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/udp_tunnel.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index cbd3a43074bd..9acef2fbd2fd 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -339,9 +339,8 @@ udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { if (udp_tunnel_nic_ops) { - udp_tunnel_nic_ops->lock(dev); + udp_tunnel_nic_ops->assert_locked(dev); udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); - udp_tunnel_nic_ops->unlock(dev); } } -- cgit v1.2.3 From f3128dd6762d71ec92ae888bf582a5f751c3f2e0 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 24 Jun 2025 09:41:08 +0800 Subject: security: Remove unused declaration cap_mmap_file() Commit 3f4f1f8a1ab7 ("capabilities: remove cap_mmap_file()") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Serge Hallyn Signed-off-by: Paul Moore --- include/linux/security.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index dba349629229..e8d9f6069f0c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -193,8 +193,6 @@ int cap_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc); extern int cap_mmap_addr(unsigned long addr); -extern int cap_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); -- cgit v1.2.3 From 22bbc1dcd0d6785fb390c41f0dd5b5e218d23bdd Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 23 Jun 2025 12:00:53 +0200 Subject: vsock/uapi: fix linux/vm_sockets.h userspace compilation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a userspace application just include will fail to build with the following errors: /usr/include/linux/vm_sockets.h:182:39: error: invalid application of ‘sizeof’ to incomplete type ‘struct sockaddr’ 182 | unsigned char svm_zero[sizeof(struct sockaddr) - | ^~~~~~ /usr/include/linux/vm_sockets.h:183:39: error: ‘sa_family_t’ undeclared here (not in a function) 183 | sizeof(sa_family_t) - | Include for userspace (guarded by ifndef __KERNEL__) where `struct sockaddr` and `sa_family_t` are defined. We already do something similar in and . Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reported-by: Daan De Meyer Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20250623100053.40979-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/vm_sockets.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index ed07181d4eff..e05280e41522 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -17,6 +17,10 @@ #ifndef _UAPI_VM_SOCKETS_H #define _UAPI_VM_SOCKETS_H +#ifndef __KERNEL__ +#include /* for struct sockaddr and sa_family_t */ +#endif + #include #include -- cgit v1.2.3 From f65bbf05392b44714ccdcc4b5b1bebfd471d2665 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 1 Dec 2024 19:28:34 -0500 Subject: alpha: regularize the situation with asm/param.h The only reason why alpha can't do what sparc et.al. are doing is that include/asm-generic/param.h relies upon the value of HZ set for userland header in uapi/asm/param.h being 100. We need that value to define USER_HZ and we need that definition to outlive the redefinition of HZ kernel-side. And alpha needs it to be 1024, not 100 like everybody else. So let's add __USER_HZ to uapi/asm-generic/param.h, defaulting to 100 and used to define HZ. That way include/asm-generic/param.h can use that thing instead of open-coding it - it won't be affected by undefining and redefining HZ. That done, alpha asm/param.h can be removed and uapi/asm/param.h switched to defining __USER_HZ and EXEC_PAGESIZE and then including - asm/param.h will resolve to uapi/asm/param.h, which pulls , which will do the right thing both in the kernel and userland contexts. Reviewed-by: Arnd Bergmann Signed-off-by: Al Viro --- include/asm-generic/param.h | 2 +- include/uapi/asm-generic/param.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h index 8d3009dd28ff..8348c116aa3b 100644 --- a/include/asm-generic/param.h +++ b/include/asm-generic/param.h @@ -6,6 +6,6 @@ # undef HZ # define HZ CONFIG_HZ /* Internal kernel timer frequency */ -# define USER_HZ 100 /* some user interfaces are */ +# define USER_HZ __USER_HZ /* some user interfaces are */ # define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ #endif /* __ASM_GENERIC_PARAM_H */ diff --git a/include/uapi/asm-generic/param.h b/include/uapi/asm-generic/param.h index baad02ea7f93..3ed505dfea13 100644 --- a/include/uapi/asm-generic/param.h +++ b/include/uapi/asm-generic/param.h @@ -2,8 +2,12 @@ #ifndef _UAPI__ASM_GENERIC_PARAM_H #define _UAPI__ASM_GENERIC_PARAM_H +#ifndef __USER_HZ +#define __USER_HZ 100 +#endif + #ifndef HZ -#define HZ 100 +#define HZ __USER_HZ #endif #ifndef EXEC_PAGESIZE -- cgit v1.2.3 From 67caa528ae08cd05e485c0ea6aea0baaf6579b06 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 21 Jun 2025 10:28:41 -0600 Subject: ublk: fix narrowing warnings in UAPI header When a C++ file compiled with -Wc++11-narrowing includes the UAPI header linux/ublk_cmd.h, ublk_sqe_addr_to_auto_buf_reg()'s assignments of u64 values to u8, u16, and u32 fields result in compiler warnings. Add explicit casts to the intended types to avoid these warnings. Drop the unnecessary bitmasks. Reported-by: Uday Shankar Signed-off-by: Caleb Sander Mateos Fixes: 99c1e4eb6a3f ("ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250621162842.337452-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 77d9d6af46da..c062109cb686 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -450,10 +450,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( __u64 sqe_addr) { struct ublk_auto_buf_reg reg = { - .index = sqe_addr & 0xffff, - .flags = (sqe_addr >> 16) & 0xff, - .reserved0 = (sqe_addr >> 24) & 0xff, - .reserved1 = sqe_addr >> 32, + .index = (__u16)sqe_addr, + .flags = (__u8)(sqe_addr >> 16), + .reserved0 = (__u8)(sqe_addr >> 24), + .reserved1 = (__u32)(sqe_addr >> 32), }; return reg; -- cgit v1.2.3 From 81b4d1a1d03301dcca8af5c58eded9e535f1f6ed Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 21 Jun 2025 11:10:14 -0600 Subject: ublk: update UBLK_F_SUPPORT_ZERO_COPY comment in UAPI header UBLK_F_SUPPORT_ZERO_COPY has a very old comment describing the initial idea for how zero-copy would be implemented. The actual implementation added in commit 1f6540e2aabb ("ublk: zc register/unregister bvec") uses io_uring registered buffers rather than shared memory mapping. Remove the inaccurate remarks about mapping ublk request memory into the ublk server's address space and requiring 4K block size. Replace them with a description of the current zero-copy mechanism. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250621171015.354932-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c062109cb686..c9751bdfd937 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -135,8 +135,28 @@ #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) /* - * zero copy requires 4k block size, and can remap ublk driver's io - * request into ublksrv's vm space + * ublk server can register data buffers for incoming I/O requests with a sparse + * io_uring buffer table. The request buffer can then be used as the data buffer + * for io_uring operations via the fixed buffer index. + * Note that the ublk server can never directly access the request data memory. + * + * To use this feature, the ublk server must first register a sparse buffer + * table on an io_uring instance. + * When an incoming ublk request is received, the ublk server submits a + * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The + * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register + * and addr is the index in the io_uring's buffer table to install the buffer. + * SQEs can now be submitted to the io_uring to read/write the request's buffer + * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or + * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. + * Once the last io_uring operation using the request's buffer has completed, + * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, + * and addr again specifying the request buffer to unregister. + * The ublk request is completed when its buffer is unregistered from all + * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak + * uninitialized kernel memory by not reading into the full request buffer. */ #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) -- cgit v1.2.3 From 63d0a9123120a2e10861ac7f6dc474bee653d3b2 Mon Sep 17 00:00:00 2001 From: Ujwal Jain Date: Sat, 14 Jun 2025 16:47:11 +0800 Subject: kunit: Adjust kunit_test timeout based on test_{suite,case} speed Currently, the in-kernel kunit test case timeout is 300 seconds. (There is a separate timeout mechanism for the whole test execution in kunit.py, but that's unrelated.) However, tests marked 'slow' or 'very slow' may timeout, particularly on slower machines. Implement a multiplier to the test-case timeout, so that slower tests have longer to complete: - DEFAULT -> 1x default timeout - KUNIT_SPEED_SLOW -> 3x default timeout - KUNIT_SPEED_VERY_SLOW -> 12x default timeout A further change is planned to allow user configuration of the default/base timeout to allow people with faster or slower machines to adjust these to their use-cases. Link: https://lore.kernel.org/r/20250614084711.2654593-2-davidgow@google.com Signed-off-by: Ujwal Jain Co-developed-by: David Gow Signed-off-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Shuah Khan --- include/kunit/try-catch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/kunit/try-catch.h b/include/kunit/try-catch.h index 7c966a1adbd3..d4e1a5b98ed6 100644 --- a/include/kunit/try-catch.h +++ b/include/kunit/try-catch.h @@ -47,6 +47,7 @@ struct kunit_try_catch { int try_result; kunit_try_catch_func_t try; kunit_try_catch_func_t catch; + unsigned long timeout; void *context; }; -- cgit v1.2.3 From 5a2a5b65d5d67279be9e1f0e4b9baf39ee594cb1 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 16 Jun 2025 11:26:20 +0300 Subject: RDMA/core: Add driver APIs pre_destroy_cq() and post_destroy_cq() Currently in ib_free_cq, it disables IRQ or cancel the CQ work before driver destroy_cq. This isn't good as a new IRQ or a CQ work can be submitted immediately after disabling IRQ or canceling CQ work, which may run concurrently with destroy_cq and cause crashes. The right flow should be: 1. Driver disables CQ to make sure no new CQ event will be submitted; 2. Disables IRQ or Cancels CQ work in core layer, to make sure no CQ polling work is running; 3. Free all resources to destroy the CQ. This patch adds 2 driver APIs: - pre_destroy_cq(): Disable a CQ to prevent it from generating any new work completions, but not free any kernel resources; - post_destroy_cq(): Free all kernel resources. In ib_free_cq, the IRQ is disabled or CQ work is canceled after pre_destroy_cq, and before post_destroy_cq. Fixes: 14d3a3b2498e ("IB: add a proper completion queue abstraction") Signed-off-by: Mark Zhang Link: https://patch.msgid.link/b5f7ae3d75f44a3e15ff3f4eb2bbdea13e06b97f.1750062328.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index af43a8d2a74a..38f68d245fa6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2489,6 +2489,15 @@ struct ib_device_ops { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); + /** + * pre_destroy_cq - Prevent a cq from generating any new work + * completions, but not free any kernel resources + */ + int (*pre_destroy_cq)(struct ib_cq *cq); + /** + * post_destroy_cq - Free all kernel resources + */ + void (*post_destroy_cq)(struct ib_cq *cq); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, -- cgit v1.2.3 From 52931f55159ea5c27ad4fe66fc0cb8ad75ab795b Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 17 Jun 2025 11:19:15 +0300 Subject: net/mlx5: fs, add multiple prios to RDMA TRANSPORT steering domain RDMA TRANSPORT domains were initially limited to a single priority. This change allows the domains to have multiple priorities, making it possible to add several rules and control the order in which they're evaluated. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Link: https://patch.msgid.link/b299cbb4c8678a33da6e6b6988b5bf6145c54b88.1750148083.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 939e58c2f386..86055d55836d 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,7 +40,7 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) -#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0 +#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 16 #define MLX5_FS_MAX_POOL_SIZE BIT(30) enum mlx5_flow_destination_type { -- cgit v1.2.3 From ebf8d47121b6ef3f38425a343a72f37c60fd6dbc Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 19 Jun 2025 14:37:17 +0300 Subject: net/mlx5: Small refactor for general object capabilities Make enum for capability bits of general object types depend on the type definitions themselves. Make sure that capabilities in the [64,127] bit range are properly calculated (type id - 64). Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250619113721.60201-2-mbloch@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2c09df4ee574..5c8f75605eac 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -12501,17 +12501,6 @@ struct mlx5_ifc_affiliated_event_header_bits { u8 obj_id[0x20]; }; -enum { - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24), -}; - -enum { - MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(0x13), -}; - enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, @@ -12523,6 +12512,22 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; +enum { + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_IPSEC), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_SAMPLER), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO), +}; + +enum { + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL - 0x40), +}; + enum { MLX5_IPSEC_OBJECT_ICV_LEN_16B, }; -- cgit v1.2.3 From 1f6da56679d33c733aaee929fd9af962ad66edbd Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 19 Jun 2025 14:37:18 +0300 Subject: net/mlx5: Add IFC bits for PCIe Congestion Event object Add definitions for the PCIe Congestion Event object and the relevant FW command structures. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250619113721.60201-3-mbloch@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5c8f75605eac..0e93f342be09 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -12509,6 +12509,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL = 0x53, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT = 0x58, MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; @@ -12526,6 +12527,8 @@ enum { enum { MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL - 0x40), + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT - 0x40), }; enum { @@ -13284,4 +13287,41 @@ struct mlx5_ifc_mrtcq_reg_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_pcie_cong_event_obj_bits { + u8 modify_select_field[0x40]; + + u8 inbound_event_en[0x1]; + u8 outbound_event_en[0x1]; + u8 reserved_at_42[0x1e]; + + u8 reserved_at_60[0x1]; + u8 inbound_cong_state[0x3]; + u8 reserved_at_64[0x1]; + u8 outbound_cong_state[0x3]; + u8 reserved_at_68[0x18]; + + u8 inbound_cong_low_threshold[0x10]; + u8 inbound_cong_high_threshold[0x10]; + + u8 outbound_cong_low_threshold[0x10]; + u8 outbound_cong_high_threshold[0x10]; + + u8 reserved_at_e0[0x340]; +}; + +struct mlx5_ifc_pcie_cong_event_cmd_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_pcie_cong_event_obj_bits cong_obj; +}; + +struct mlx5_ifc_pcie_cong_event_cmd_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits hdr; + struct mlx5_ifc_pcie_cong_event_obj_bits cong_obj; +}; + +enum mlx5e_pcie_cong_event_mod_field { + MLX5_PCIE_CONG_EVENT_MOD_EVENT_EN = BIT(0), + MLX5_PCIE_CONG_EVENT_MOD_THRESH = BIT(2), +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From 61977ccf6568f9d104462727b49412a80c22c519 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 20 Jun 2025 01:10:24 +0800 Subject: dt-bindings: reset: sun55i-a523-r-ccu: Add missing PPU0 reset There is a PPU0 reset control bit in the same register as the PPU1 reset control. This missing reset control is for the PCK-600 unit in the SoC. Manual tests show that the reset control indeed exists, and if not configured, the system will hang when the PCK-600 registers are accessed. Add a reset entry for it at the end of the existing ones. Fixes: 52dbf84857f0 ("dt-bindings: clk: sunxi-ng: document two Allwinner A523 CCUs") Acked-by: Conor Dooley Acked-by: Stephen Boyd Reviewed-by: Andre Przywara Link: https://patch.msgid.link/20250619171025.3359384-2-wens@kernel.org Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/reset/sun55i-a523-r-ccu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/reset/sun55i-a523-r-ccu.h b/include/dt-bindings/reset/sun55i-a523-r-ccu.h index dd6fbb372e19..eb31ae9958d6 100644 --- a/include/dt-bindings/reset/sun55i-a523-r-ccu.h +++ b/include/dt-bindings/reset/sun55i-a523-r-ccu.h @@ -21,5 +21,6 @@ #define RST_BUS_R_IR_RX 12 #define RST_BUS_R_RTC 13 #define RST_BUS_R_CPUCFG 14 +#define RST_BUS_R_PPU0 15 #endif /* _DT_BINDINGS_RST_SUN55I_A523_R_CCU_H_ */ -- cgit v1.2.3 From f792733e08d5f5d44ef76d22bcca7ca45a82d0de Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 25 Jun 2025 22:04:28 +0800 Subject: ASoC: sdw_utils: add component_name string to dai_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the sdw machine driver uses different way to get the component name from the DAI name for different codecs in the rtd_init callback. It means that we need to rely on the rtd_init callback to get the component name. Add an optional component string to the asoc_sdw_dai_info struct allows the machine driver to get the component name directly. The commit adds the component names for the AMP dais for the preparation to set card->components string for combined speaker configs. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Shuming Fan Link: https://patch.msgid.link/20250625140430.311865-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index b63021f5afaf..6049a5d0cfcd 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -46,6 +46,7 @@ struct asoc_sdw_codec_info; struct asoc_sdw_dai_info { const bool direction[2]; /* playback & capture support */ const char *dai_name; + const char *component_name; const int dai_type; const int dailink[2]; /* dailink id for each direction */ const struct snd_kcontrol_new *controls; -- cgit v1.2.3 From 2642f55d44ce563f227dd9c620eda0dec8d882be Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Thu, 19 Jun 2025 12:18:16 +0100 Subject: pinctrl: samsung: add support for gs101 wakeup mask programming gs101 differs to other currently supported SoCs in that it has 3 wakeup mask registers for the 67 external wakeup interrupt pins in alive and far_alive. EINT_WAKEUP_MASK 0x3A80 EINT[31:0] EINT_WAKEUP_MASK2 0x3A84 EINT[63:32] EINT_WAKEUP_MASK3 0x3A88 EINT[66:64] Add gs101 specific callbacks and a dedicated gs101_wkup_irq_chip struct to handle these differences. The current wakeup mask with upstream is programmed as WAKEUP_MASK0[0x3A80] value[0xFFFFFFFF] WAKEUP_MASK1[0x3A84] value[0xF2FFEFFF] WAKEUP_MASK2[0x3A88] value[0xFFFFFFFF] Which corresponds to the following wakeup sources: gpa7-3 vol down gpa8-1 vol up gpa10-1 power gpa8-2 typec-int Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250619-gs101-eint-mask-v1-2-89438cfd7499@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 1a2c0e0838f9..938c6db235fb 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -669,6 +669,7 @@ #define GS101_CPU_INFORM(cpu) \ (GS101_CPU0_INFORM + (cpu*4)) #define GS101_SYSTEM_CONFIGURATION (0x3A00) +#define GS101_EINT_WAKEUP_MASK (0x3A80) #define GS101_PHY_CTRL_USB20 (0x3EB0) #define GS101_PHY_CTRL_USBDP (0x3EB4) -- cgit v1.2.3 From 51a4273dcab39dd1e850870945ccec664352d383 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Tue, 8 Apr 2025 15:02:11 +0530 Subject: KVM: SVM: Add missing member in SNP_LAUNCH_START command structure The sev_data_snp_launch_start structure should include a 4-byte desired_tsc_khz field before the gosvw field, which was missed in the initial implementation. As a result, the structure is 4 bytes shorter than expected by the firmware, causing the gosvw field to start 4 bytes early. Fix this by adding the missing 4-byte member for the desired TSC frequency. Fixes: 3a45dc2b419e ("crypto: ccp: Define the SEV-SNP commands") Cc: stable@vger.kernel.org Suggested-by: Tom Lendacky Reviewed-by: Tom Lendacky Tested-by: Vaishali Thakkar Signed-off-by: Nikunj A Dadhania Link: https://lore.kernel.org/r/20250408093213.57962-3-nikunj@amd.com Signed-off-by: Sean Christopherson --- include/linux/psp-sev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; -- cgit v1.2.3 From bbc13ae593e0ea47357ff6e4740c533c16c2ae1e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 May 2025 18:17:56 -0700 Subject: VFIO: KVM: x86: Drop kvm_arch_{start,end}_assignment() Drop kvm_arch_{start,end}_assignment() and all associated code now that KVM x86 no longer consumes assigned_device_count. Tracking whether or not a VFIO-assigned device is formally associated with a VM is fundamentally flawed, as such an association is optional for general usage, i.e. is prone to false negatives. E.g. prior to commit 2edd9cb79fb3 ("kvm: detect assigned device via irqbypass manager"), device passthrough via VFIO would fail to enable IRQ bypass if userspace omitted the formal VFIO<=>KVM binding. And device drivers that *need* the VFIO<=>KVM connection, e.g. KVM-GT, shouldn't be relying on generic x86 tracking infrastructure. Cc: Jim Mattson Link: https://lore.kernel.org/r/20250523011756.3243624-6-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb9ec06aa807..15656b7fba6c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1690,24 +1690,6 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif -#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE -void kvm_arch_start_assignment(struct kvm *kvm); -void kvm_arch_end_assignment(struct kvm *kvm); -bool kvm_arch_has_assigned_device(struct kvm *kvm); -#else -static inline void kvm_arch_start_assignment(struct kvm *kvm) -{ -} - -static inline void kvm_arch_end_assignment(struct kvm *kvm) -{ -} - -static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) -{ - return false; -} -#endif static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From c8dc579169738a3546f57ecb38e62d3872a3cc04 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Mon, 9 Jun 2025 11:53:56 -0400 Subject: i2c: amd-isp: Initialize unique adapter name Initialize unique name for amdisp i2c adapter, which is used in the platform driver to detect the matching adapter for i2c_client creation. Add definition of amdisp i2c adapter name in a new header file (include/linux/soc/amd/isp4_misc.h) as it is referred in different driver modules. Tested-by: Randy Dunlap Signed-off-by: Pratap Nirujogi Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250609155601.1477055-3-pratap.nirujogi@amd.com --- include/linux/soc/amd/isp4_misc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/soc/amd/isp4_misc.h (limited to 'include') diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif -- cgit v1.2.3 From aced132599b3c8884c050218d4c48eef203678f6 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 25 Jun 2025 09:40:24 -0700 Subject: bpf: Add range tracking for BPF_NEG Add range tracking for instruction BPF_NEG. Without this logic, a trivial program like the following will fail volatile bool found_value_b; SEC("lsm.s/socket_connect") int BPF_PROG(test_socket_connect) { if (!found_value_b) return -1; return 0; } with verifier log: "At program exit the register R0 has smin=0 smax=4294967295 should have been in [-4095, 0]". This is because range information is lost in BPF_NEG: 0: R1=ctx() R10=fp0 ; if (!found_value_b) @ xxxx.c:24 0: (18) r1 = 0xffa00000011e7048 ; R1_w=map_value(...) 2: (71) r0 = *(u8 *)(r1 +0) ; R0_w=scalar(smin32=0,smax=255) 3: (a4) w0 ^= 1 ; R0_w=scalar(smin32=0,smax=255) 4: (84) w0 = -w0 ; R0_w=scalar(range info lost) Note that, the log above is manually modified to highlight relevant bits. Fix this by maintaining proper range information with BPF_NEG, so that the verifier will know: 4: (84) w0 = -w0 ; R0_w=scalar(smin32=-255,smax=0) Also updated selftests based on the expected behavior. Signed-off-by: Song Liu Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250625164025.3310203-2-song@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/tnum.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 3c13240077b8..57ed3035cc30 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -40,6 +40,8 @@ struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness); struct tnum tnum_add(struct tnum a, struct tnum b); /* Subtract two tnums, return @a - @b */ struct tnum tnum_sub(struct tnum a, struct tnum b); +/* Neg of a tnum, return 0 - @a */ +struct tnum tnum_neg(struct tnum a); /* Bitwise-AND, return @a & @b */ struct tnum tnum_and(struct tnum a, struct tnum b); /* Bitwise-OR, return @a | @b */ -- cgit v1.2.3 From d83caf7c8dad96051267c18786b7bc446b537f3c Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 25 Jun 2025 15:16:21 +0000 Subject: bpf: add btf_type_is_i{32,64} helpers There are places in BPF code which check if a BTF type is an integer of particular size. This code can be made simpler by using helpers. Add new btf_type_is_i{32,64} helpers, and simplify code in a few files. (Suggested by Eduard for a patch which copy-pasted such a check [1].) v1 -> v2: * export less generic helpers (Eduard) * make subject less generic than in [v1] (Eduard) [1] https://lore.kernel.org/bpf/7edb47e73baa46705119a23c6bf4af26517a640f.camel@gmail.com/ [v1] https://lore.kernel.org/bpf/20250624193655.733050-1-a.s.protopopov@gmail.com/ Suggested-by: Eduard Zingerman Signed-off-by: Anton Protopopov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250625151621.1000584-1-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index b2983706292f..a40beb9cf160 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -221,6 +221,8 @@ bool btf_is_vmlinux(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); u32 btf_nr_types(const struct btf *btf); struct btf *btf_base_btf(const struct btf *btf); +bool btf_type_is_i32(const struct btf_type *t); +bool btf_type_is_i64(const struct btf_type *t); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -- cgit v1.2.3 From bfb4fb77f9a8ce33ce357224569eae5564eec573 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 23 Jun 2025 08:31:47 -0700 Subject: team: replace team lock with rtnl lock syszbot reports various ordering issues for lower instance locks and team lock. Switch to using rtnl lock for protecting team device, similar to bonding. Based on the patch by Tetsuo Handa. Cc: Jiri Pirko Cc: Tetsuo Handa Reported-by: syzbot+705c61d60b091ef42c04@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=705c61d60b091ef42c04 Reported-by: syzbot+71fd22ae4b81631e22fd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=71fd22ae4b81631e22fd Fixes: 6b1d3c5f675c ("team: grab team lock during team_change_rx_flags") Link: https://lkml.kernel.org/r/ZoZ2RH9BcahEB9Sb@nanopsycho.orion Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250623153147.3413631-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/if_team.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index cdc684e04a2f..ce97d891cf72 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -191,8 +191,6 @@ struct team { const struct header_ops *header_ops_cache; - struct mutex lock; /* used for overall locking, e.g. port lists write */ - /* * List of enabled ports and their count */ @@ -223,7 +221,6 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; - struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; -- cgit v1.2.3 From 826334359eacc1b70e9752ebc4954ed775dd40ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Jun 2025 16:17:13 -0700 Subject: netlink: specs: add the multicast group name to spec Add the multicast group's name to the YAML spec. Without it YNL doesn't know how to subscribe to notifications. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250623231720.3124717-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 2 -- include/uapi/linux/ethtool_netlink_generated.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 09a75bdb6560..fa5d645140a4 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -208,6 +208,4 @@ enum { ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) }; -#define ETHTOOL_MCGRP_MONITOR_NAME "monitor" - #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 4944badf9fba..859e28c8a91a 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -867,4 +867,6 @@ enum { ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) }; +#define ETHTOOL_MCGRP_MONITOR_NAME "monitor" + #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ -- cgit v1.2.3 From f9dc3e52d821dc1f9afeec43fb1c18ac94bd587a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Jun 2025 16:17:16 -0700 Subject: net: ethtool: remove the data argument from ethtool_notify() ethtool_notify() takes a const void *data argument, which presumably was intended to pass information from the call site to the subcommand handler. This argument currently has no users. Expecting the data to be subcommand-specific has two complications. Complication #1 is that its not plumbed thru any of the standardized callbacks. It gets propagated to ethnl_default_notify() where it remains unused. Coming from the ethnl_default_set_doit() side we pass in NULL, because how could we have a command specific attribute in a generic handler. Complication #2 is that we expect the ethtool_notify() callers to know what attribute type to pass in. Again, the data pointer is untyped. RSS will need to pass the context ID to the notifications. I think it's a better design if the "subcommand" exports its own typed interface and constructs the appropriate argument struct (which will be req_info). Remove the unused data argument from ethtool_notify() but retain it in a new internal helper which subcommands can use to build a typed interface. Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/20250623231720.3124717-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03c26bb0fbbe..db5bfd4e7ec8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5138,10 +5138,9 @@ void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info); #if IS_ENABLED(CONFIG_ETHTOOL_NETLINK) -void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data); +void ethtool_notify(struct net_device *dev, unsigned int cmd); #else -static inline void ethtool_notify(struct net_device *dev, unsigned int cmd, - const void *data) +static inline void ethtool_notify(struct net_device *dev, unsigned int cmd) { } #endif -- cgit v1.2.3 From 46837be5afc6ea70bc827ca4439410e069e2ee37 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Jun 2025 16:17:18 -0700 Subject: net: ethtool: rss: add notifications In preparation for RSS_SET handling in ethnl introduce Netlink notifications for RSS. Only cover modifications, not creation and not removal of a context, because the latter may deserve a different notification type. We should cross that bridge when we add the support for context add / remove via Netlink. Link: https://patch.msgid.link/20250623231720.3124717-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 859e28c8a91a..8f30ffa1cd14 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -862,6 +862,7 @@ enum { ETHTOOL_MSG_TSCONFIG_GET_REPLY, ETHTOOL_MSG_TSCONFIG_SET_REPLY, ETHTOOL_MSG_PSE_NTF, + ETHTOOL_MSG_RSS_NTF, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) -- cgit v1.2.3 From 4b70e2a069d90cdc447c6bf8437c8b99345852e9 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 24 Jun 2025 09:43:27 +0800 Subject: net/sched: Remove unused functions Since commit c54e1d920f04 ("flow_offload: add ops to tc_action_ops for flow action setup") these are unused. Signed-off-by: Yue Haibing Acked-by: Cong Wang Link: https://patch.msgid.link/20250624014327.3686873-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_csum.h | 9 --------- include/net/tc_act/tc_ct.h | 9 --------- include/net/tc_act/tc_gate.h | 9 --------- include/net/tc_act/tc_mpls.h | 9 --------- include/net/tc_act/tc_police.h | 9 --------- include/net/tc_act/tc_sample.h | 9 --------- include/net/tc_act/tc_vlan.h | 9 --------- 7 files changed, 63 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 68269e4581b7..2515da0142a6 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -18,15 +18,6 @@ struct tcf_csum { }; #define to_tcf_csum(a) ((struct tcf_csum *)a) -static inline bool is_tcf_csum(const struct tc_action *a) -{ -#ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->id == TCA_ID_CSUM) - return true; -#endif - return false; -} - static inline u32 tcf_csum_update_flags(const struct tc_action *a) { u32 update_flags; diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 77f87c622a2e..e6b45cb27ebf 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -92,13 +92,4 @@ static inline void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) { } #endif -static inline bool is_tcf_ct(const struct tc_action *a) -{ -#if defined(CONFIG_NET_CLS_ACT) && IS_ENABLED(CONFIG_NF_CONNTRACK) - if (a->ops && a->ops->id == TCA_ID_CT) - return true; -#endif - return false; -} - #endif /* __NET_TC_CT_H */ diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index c8fa11ebb397..c1a67149c6b6 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -51,15 +51,6 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) -static inline bool is_tcf_gate(const struct tc_action *a) -{ -#ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->id == TCA_ID_GATE) - return true; -#endif - return false; -} - static inline s32 tcf_gate_prio(const struct tc_action *a) { s32 tcfg_prio; diff --git a/include/net/tc_act/tc_mpls.h b/include/net/tc_act/tc_mpls.h index 721de4f5733a..d452e5e94fd0 100644 --- a/include/net/tc_act/tc_mpls.h +++ b/include/net/tc_act/tc_mpls.h @@ -27,15 +27,6 @@ struct tcf_mpls { }; #define to_mpls(a) ((struct tcf_mpls *)a) -static inline bool is_tcf_mpls(const struct tc_action *a) -{ -#ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->id == TCA_ID_MPLS) - return true; -#endif - return false; -} - static inline u32 tcf_mpls_action(const struct tc_action *a) { u32 tcfm_action; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 283bde711a42..490d88cb5233 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -44,15 +44,6 @@ struct tc_police_compat { struct tc_ratespec peakrate; }; -static inline bool is_tcf_police(const struct tc_action *act) -{ -#ifdef CONFIG_NET_CLS_ACT - if (act->ops && act->ops->id == TCA_ID_POLICE) - return true; -#endif - return false; -} - static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act) { struct tcf_police *police = to_police(act); diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index b5d76305e854..abd163ca1864 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -17,15 +17,6 @@ struct tcf_sample { }; #define to_sample(a) ((struct tcf_sample *)a) -static inline bool is_tcf_sample(const struct tc_action *a) -{ -#ifdef CONFIG_NET_CLS_ACT - return a->ops && a->ops->id == TCA_ID_SAMPLE; -#else - return false; -#endif -} - static inline __u32 tcf_sample_rate(const struct tc_action *a) { return to_sample(a)->rate; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 904eddfc1826..3f5e9242b5e8 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -26,15 +26,6 @@ struct tcf_vlan { }; #define to_vlan(a) ((struct tcf_vlan *)a) -static inline bool is_tcf_vlan(const struct tc_action *a) -{ -#ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->id == TCA_ID_VLAN) - return true; -#endif - return false; -} - static inline u32 tcf_vlan_action(const struct tc_action *a) { u32 tcfv_action; -- cgit v1.2.3 From 8bd0af3154b2206ce19f8b1410339f7a2a56d0c3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 24 Jun 2025 08:50:44 -0500 Subject: lib: packing: Include necessary headers packing.h uses ARRAY_SIZE(), BUILD_BUG_ON_MSG(), min(), max(), and sizeof_field() without including the headers where they are defined, potentially causing build failures. Fix this in packing.h and sort the result. Signed-off-by: Nathan Lynch Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250624-packing-includes-v1-1-c23c81fab508@amd.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index 0589d70bbe04..20ae4d452c7b 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -5,8 +5,12 @@ #ifndef _LINUX_PACKING_H #define _LINUX_PACKING_H -#include +#include #include +#include +#include +#include +#include #define GEN_PACKED_FIELD_STRUCT(__type) \ struct packed_field_ ## __type { \ -- cgit v1.2.3 From 2855e43c6bb154a9b8e27abda8df364aed574b22 Mon Sep 17 00:00:00 2001 From: RubenKelevra Date: Tue, 24 Jun 2025 18:57:11 +0200 Subject: uapi: net_dropmon: drop unused is_drop_point_hw macro Commit 4ea7e38696c7 ("dropmon: add ability to detect when hardware drops rx packets") introduced is_drop_point_hw, but the symbol was never referenced anywhere in the kernel tree and is currently not used by dropwatch. I could not find, to the best of my abilities, a current out-of-tree user of this macro. The definition also contains a syntax error in its for-loop, so any project that tried to compile against it would fail. Removing the macro therefore eliminates dead code without breaking existing users. Signed-off-by: RubenKelevra Link: https://patch.msgid.link/20250624165711.1188691-1-rubenkelevra@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_dropmon.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 9dd41c2f58a6..87cbef48d4c7 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -10,13 +10,6 @@ struct net_dm_drop_point { __u32 count; }; -#define is_drop_point_hw(x) do {\ - int ____i, ____j;\ - for (____i = 0; ____i < 8; i ____i++)\ - ____j |= x[____i];\ - ____j;\ -} while (0) - #define NET_DM_CFG_VERSION 0 #define NET_DM_CFG_ALERT_COUNT 1 #define NET_DM_CFG_ALERT_DELAY 2 -- cgit v1.2.3 From 9e6dd4c256d0774701637b958ba682eff4991277 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 24 Jun 2025 14:09:59 -0700 Subject: netlink: specs: mptcp: replace underscores with dashes in names We're trying to add a strict regexp for the name format in the spec. Underscores will not be allowed, dashes should be used instead. This makes no difference to C (codegen, if used, replaces special chars in names) but it gives more uniform naming in Python. Fixes: bc8aeb2045e2 ("Documentation: netlink: add a YAML spec for mptcp") Reviewed-by: Davide Caratti Reviewed-by: Donald Hunter Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250624211002.3475021-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp_pm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 84fa8a21dfd0..6ac84b2f636c 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -27,14 +27,14 @@ * token, rem_id. * @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of * sk_err) could be set if an error has been detected for this subflow. * Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - * daddr6, sport, dport, backup, if_idx [, error]. + * daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes: * family, sport, saddr4 | saddr6. * @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family, -- cgit v1.2.3 From f5769359c5b241978e6933672bb78b3adc36aa18 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Fri, 20 Jun 2025 02:31:54 +0800 Subject: mm/alloc_tag: fix the kmemleak false positive issue in the allocation of the percpu variable tag->counters When loading a module, as long as the module has memory allocation operations, kmemleak produces a false positive report that resembles the following: unreferenced object (percpu) 0x7dfd232a1650 (size 16): comm "modprobe", pid 1301, jiffies 4294940249 hex dump (first 16 bytes on cpu 2): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): kmemleak_alloc_percpu+0xb4/0xd0 pcpu_alloc_noprof+0x700/0x1098 load_module+0xd4/0x348 codetag_module_init+0x20c/0x450 codetag_load_module+0x70/0xb8 load_module+0xef8/0x1608 init_module_from_file+0xec/0x158 idempotent_init_module+0x354/0x608 __arm64_sys_finit_module+0xbc/0x150 invoke_syscall+0xd4/0x258 el0_svc_common.constprop.0+0xb4/0x240 do_el0_svc+0x48/0x68 el0_svc+0x40/0xf8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x1ac/0x1b0 This is because the module can only indirectly reference alloc_tag_counters through the alloc_tag section, which misleads kmemleak. However, we don't have a kmemleak ignore interface for percpu allocations yet. So let's create one and invoke it for tag->counters. [gehao@kylinos.cn: fix build error when CONFIG_DEBUG_KMEMLEAK=n, s/igonore/ignore/] Link: https://lkml.kernel.org/r/20250620093102.2416767-1-hao.ge@linux.dev Link: https://lkml.kernel.org/r/20250619183154.2122608-1-hao.ge@linux.dev Fixes: 12ca42c23775 ("alloc_tag: allocate percpu counters for module tags dynamically") Signed-off-by: Hao Ge Reviewed-by: Catalin Marinas Acked-by: Suren Baghdasaryan [lib/alloc_tag.c] Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/kmemleak.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } -- cgit v1.2.3 From 4ecf83741401c70d4420588ee1f3b1ca04ef58d5 Mon Sep 17 00:00:00 2001 From: Jake Hillion Date: Wed, 25 Jun 2025 18:05:46 +0100 Subject: sched_ext: Drop kfuncs marked for removal in 6.15 sched_ext performed a kfunc renaming pass in 6.13 and kept the old names around for compatibility with old binaries. These were scheduled for cleanup in 6.15 but were missed. Submitting for cleanup in for-next. Removed the kfuncs, their flags, and any references I could find to them in doc comments. Left the entries in include/scx/compat.bpf.h as they're still useful to make new binaries compatible with old kernels. Tested by applying to my kernel. It builds and a modern version of scx_lavd loads fine. Signed-off-by: Jake Hillion Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 0cf0915572c9..7047101dbf58 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -164,7 +164,7 @@ struct sched_ext_entity { /* * Runtime budget in nsecs. This is usually set through - * scx_bpf_dispatch() but can also be modified directly by the BPF + * scx_bpf_dsq_insert() but can also be modified directly by the BPF * scheduler. Automatically decreased by SCX as the task executes. On * depletion, a scheduling event is triggered. * @@ -176,10 +176,10 @@ struct sched_ext_entity { /* * Used to order tasks when dispatching to the vtime-ordered priority - * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime() - * but can also be modified directly by the BPF scheduler. Modifying it - * while a task is queued on a dsq may mangle the ordering and is not - * recommended. + * queue of a dsq. This is usually set through + * scx_bpf_dsq_insert_vtime() but can also be modified directly by the + * BPF scheduler. Modifying it while a task is queued on a dsq may + * mangle the ordering and is not recommended. */ u64 dsq_vtime; -- cgit v1.2.3 From 16e2707cf15e09234445d40ddd76f11240be8767 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Wed, 4 Jun 2025 15:39:37 -0400 Subject: cpumask: add cpumask_clear_cpus() When user wants to clear a range in cpumask, the only option the API provides now is a for-loop, like: for_each_cpu_from(cpu, mask) { if (cpu >= ncpus) break; __cpumask_clear_cpu(cpu, mask); } In the bitmap API we have bitmap_clear() for that, which is significantly faster than a for-loop. Propagate it to cpumasks. Signed-off-by: Yury Norov [NVIDIA] Link: https://patch.msgid.link/20250604193947.11834-2-yury.norov@gmail.com Signed-off-by: Leon Romanovsky --- include/linux/cpumask.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7ae80a7ca81e..ede95bbe8b80 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -609,6 +609,18 @@ void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +/** + * cpumask_clear_cpus - clear cpus in a cpumask + * @dstp: the cpumask pointer + * @cpu: cpu number (< nr_cpu_ids) + * @ncpus: number of cpus to clear (< nr_cpu_ids) + */ +static __always_inline void cpumask_clear_cpus(struct cpumask *dstp, + unsigned int cpu, unsigned int ncpus) +{ + cpumask_check(cpu + ncpus - 1); + bitmap_clear(cpumask_bits(dstp), cpumask_check(cpu), ncpus); +} /** * cpumask_clear_cpu - clear a cpu in a cpumask -- cgit v1.2.3 From 2f8839e6c5f8e200629fd730aac5dd874c1d2544 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:35 +0200 Subject: crypto: ahash - make hash walk functions from ahash.c public Make the hash walk functions crypto_hash_walk_done() crypto_hash_walk_first() crypto_hash_walk_last() public again. These functions had been removed from the header file include/crypto/internal/hash.h with commit 7fa481734016 ("crypto: ahash - make hash walk functions private to ahash.c") as there was no crypto algorithm code using them. With the upcoming crypto implementation for s390 phmac these functions will be exploited and thus need to be public within the kernel again. Signed-off-by: Harald Freudenberger Acked-by: Holger Dengler Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f052afa6e7b0..2456d6ea73f0 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -30,6 +30,20 @@ __##name##_req, (req)) struct ahash_request; +struct scatterlist; + +struct crypto_hash_walk { + const char *data; + + unsigned int offset; + unsigned int flags; + + struct page *pg; + unsigned int entrylen; + + unsigned int total; + struct scatterlist *sg; +}; struct ahash_instance { void (*free)(struct ahash_instance *inst); @@ -61,6 +75,15 @@ struct crypto_shash_spawn { struct crypto_spawn base; }; +int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); +int crypto_hash_walk_first(struct ahash_request *req, + struct crypto_hash_walk *walk); + +static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) +{ + return !(walk->entrylen | walk->total); +} + int crypto_register_ahash(struct ahash_alg *alg); void crypto_unregister_ahash(struct ahash_alg *alg); int crypto_register_ahashes(struct ahash_alg *algs, int count); -- cgit v1.2.3 From d0da164ba63dd6ff3696c565b118631a277822c3 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:38 +0200 Subject: crypto: ahash - Add crypto_ahash_tested() helper function Add a little inline helper function crypto_ahash_tested() to the internal/hash.h header file to retrieve the tested status (that is the CRYPTO_ALG_TESTED bit in the cra_flags). Signed-off-by: Harald Freudenberger Suggested-by: Herbert Xu Reviewed-by: Holger Dengler Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 2456d6ea73f0..6ec5f2f37ccb 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -196,6 +196,13 @@ static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, tfm->reqsize = reqsize; } +static inline bool crypto_ahash_tested(struct crypto_ahash *tfm) +{ + struct crypto_tfm *tfm_base = crypto_ahash_tfm(tfm); + + return tfm_base->__crt_alg->cra_flags & CRYPTO_ALG_TESTED; +} + static inline void crypto_ahash_set_reqsize_dma(struct crypto_ahash *ahash, unsigned int reqsize) { -- cgit v1.2.3 From 8cffca866ba86cbf0d097e56521b17d830956d4a Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 17 Jun 2025 11:44:01 +0300 Subject: RDMA/core: Extend RDMA device registration to be net namespace aware Presently, RDMA devices are always registered within the init network namespace, even if the associated devlink device's namespace was changed via a devlink reload. This mismatch leads to discrepancies between the network namespace of the devlink device and that of the RDMA device. Therefore, extend the RDMA device allocation API to optionally take the net namespace. This isn't limited to devices that support devlink but allows all users to provide the network namespace if they need to do so. If a network namespace is provided during device allocation, it's up to the caller to make sure the namespace stays valid until ib_register_device() is called. Signed-off-by: Shay Drory Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 38f68d245fa6..b91a81234832 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2914,11 +2914,18 @@ struct ib_block_iter { unsigned int __pg_bit; /* alignment of current block */ }; -struct ib_device *_ib_alloc_device(size_t size); +struct ib_device *_ib_alloc_device(size_t size, struct net *net); #define ib_alloc_device(drv_struct, member) \ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ - struct drv_struct, member))), \ + struct drv_struct, member)), \ + &init_net), \ + struct drv_struct, member) + +#define ib_alloc_device_with_net(drv_struct, member, net) \ + container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct drv_struct, member)), net), \ struct drv_struct, member) void ib_dealloc_device(struct ib_device *device); -- cgit v1.2.3 From 611d08207d313500d010d8792424346ce70d0cfb Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 17 Jun 2025 11:44:02 +0300 Subject: RDMA/mlx5: Allocate IB device with net namespace supplied from core dev Use the new ib_alloc_device_with_net() API to allocate the IB device so that it is properly bound to the network namespace obtained via mlx5_core_net(). This change ensures correct namespace association (e.g., for containerized setups). Additionally, expose mlx5_core_net so that RDMA driver can use it. Signed-off-by: Shay Drory Signed-off-by: Mark Bloch Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e6ba8f4f4bd1..3475d33c75f4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1349,4 +1349,9 @@ enum { }; bool mlx5_wc_support_get(struct mlx5_core_dev *mdev); + +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From f1208b05574f63c52e88109d8c75afdf4fc6bf42 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 17 Jun 2025 11:44:03 +0300 Subject: RDMA/ipoib: Use parent rdma device net namespace Use the net namespace of the underlying rdma device. After honoring the rdma device's namespace, the ipoib netdev now also runs in the same net namespace of the rdma device. Add an API to read the net namespace of the rdma device so that ULP such as IPoIB can use it to initialize its netdev. Signed-off-by: Parav Pandit Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b91a81234832..7da27f01eeb6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4871,6 +4871,11 @@ static inline int ibdev_to_node(struct ib_device *ibdev) bool rdma_dev_access_netns(const struct ib_device *device, const struct net *net); +static inline struct net *rdma_dev_net(struct ib_device *device) +{ + return read_pnet(&device->coredev.rdma_net); +} + #define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) #define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF) #define IB_GRH_FLOWLABEL_MASK (0x000FFFFF) -- cgit v1.2.3 From df0f030ee7e444c55341f4210124115878284125 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 12 Jun 2025 17:39:08 +0300 Subject: irqchip/thead-c900-aclint-sswi: Generalize aclint-sswi driver and add MIPS P800 support Refactor the Thead specific implementation of the ACLINT-SSWI irqchip: - Rename the source file and related details to reflect the generic nature of the driver - Factor out the generic code that serves both Thead and MIPS variants. This generic part is compliant with the RISC-V draft spec [1] - Provide generic and Thead specific initialization functions Signed-off-by: Vladimir Kondratiev Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250612143911.3224046-5-vladimir.kondratiev@mobileye.com Link: https://github.com/riscvarchive/riscv-aclint [1] --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index df366ee15456..d381420bbd5f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -145,7 +145,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_EIOINTC_STARTING, CPUHP_AP_IRQ_AVECINTC_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, - CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING, + CPUHP_AP_IRQ_ACLINT_SSWI_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, -- cgit v1.2.3 From c8e3d6d77507c42cc1a02783f70e1e4e85e3c80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 23 Jun 2025 17:53:11 +0200 Subject: drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the locking functions take the new struct ttm_lru_walk_arg as argument in order for them to be easily used from both types of walk. v2: - Whitespace fix Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250623155313.4901-2-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_bo.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 8ad6e2713625..c19ac92c33a9 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -207,11 +207,9 @@ struct ttm_lru_walk_ops { }; /** - * struct ttm_lru_walk - Structure describing a LRU walk. + * struct ttm_lru_walk_arg - Common part for the variants of BO LRU walk. */ -struct ttm_lru_walk { - /** @ops: Pointer to the ops structure. */ - const struct ttm_lru_walk_ops *ops; +struct ttm_lru_walk_arg { /** @ctx: Pointer to the struct ttm_operation_ctx. */ struct ttm_operation_ctx *ctx; /** @ticket: The struct ww_acquire_ctx if any. */ @@ -220,6 +218,16 @@ struct ttm_lru_walk { bool trylock_only; }; +/** + * struct ttm_lru_walk - Structure describing a LRU walk. + */ +struct ttm_lru_walk { + /** @ops: Pointer to the ops structure. */ + const struct ttm_lru_walk_ops *ops; + /** @arg: Common bo LRU walk arguments. */ + struct ttm_lru_walk_arg arg; +}; + s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, struct ttm_resource_manager *man, s64 target); @@ -466,11 +474,6 @@ int ttm_bo_populate(struct ttm_buffer_object *bo, struct ttm_bo_lru_cursor { /** @res_curs: Embedded struct ttm_resource_cursor. */ struct ttm_resource_cursor res_curs; - /** - * @ctx: The struct ttm_operation_ctx used while looping. - * governs the locking mode. - */ - struct ttm_operation_ctx *ctx; /** * @bo: Buffer object pointer if a buffer object is refcounted, * NULL otherwise. @@ -481,6 +484,8 @@ struct ttm_bo_lru_cursor { * unlock before the next iteration or after loop exit. */ bool needs_unlock; + /** @arg: Common BO LRU walk arguments. */ + struct ttm_lru_walk_arg arg; }; void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs); -- cgit v1.2.3 From e1e85eb0a977f1eb1d17d4627aceaa8eeac37159 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 23 Jun 2025 17:53:12 +0200 Subject: drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of the struct ttm_operation_ctx, Pass a struct ttm_lru_walk_arg to enable us to easily extend the walk functionality, and to implement ttm_lru_walk_for_evict() using the guarded LRU iteration. Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250623155313.4901-3-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_bo.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index c19ac92c33a9..ab9a6b343a53 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -484,8 +484,8 @@ struct ttm_bo_lru_cursor { * unlock before the next iteration or after loop exit. */ bool needs_unlock; - /** @arg: Common BO LRU walk arguments. */ - struct ttm_lru_walk_arg arg; + /** @arg: Pointer to common BO LRU walk arguments. */ + struct ttm_lru_walk_arg *arg; }; void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs); @@ -493,7 +493,7 @@ void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs); struct ttm_bo_lru_cursor * ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs, struct ttm_resource_manager *man, - struct ttm_operation_ctx *ctx); + struct ttm_lru_walk_arg *arg); struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs); @@ -504,9 +504,9 @@ struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs) */ DEFINE_CLASS(ttm_bo_lru_cursor, struct ttm_bo_lru_cursor *, if (_T) {ttm_bo_lru_cursor_fini(_T); }, - ttm_bo_lru_cursor_init(curs, man, ctx), + ttm_bo_lru_cursor_init(curs, man, arg), struct ttm_bo_lru_cursor *curs, struct ttm_resource_manager *man, - struct ttm_operation_ctx *ctx); + struct ttm_lru_walk_arg *arg); static inline void * class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T) { return *_T; } @@ -517,7 +517,7 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T) * resources on LRU lists. * @_cursor: struct ttm_bo_lru_cursor to use for the iteration. * @_man: The resource manager whose LRU lists to iterate over. - * @_ctx: The struct ttm_operation_context to govern the @_bo locking. + * @_arg: The struct ttm_lru_walk_arg to govern the LRU walk. * @_bo: The struct ttm_buffer_object pointer pointing to the buffer object * for the current iteration. * @@ -530,8 +530,8 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T) * example a return or break statement. Exiting the loop will also unlock * (if needed) and unreference @_bo. */ -#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _ctx, _bo) \ - scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _ctx) \ +#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \ + scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \ for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \ (_bo) = ttm_bo_lru_cursor_next(_cursor)) -- cgit v1.2.3 From bb8aa27eff6f3376242da37c2d02b9dcc66934b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 23 Jun 2025 17:53:13 +0200 Subject: drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid duplicating the tricky bo locking implementation, Implement ttm_lru_walk_for_evict() using the guarded bo LRU iteration. To facilitate this, support ticketlocking from the guarded bo LRU iteration. v2: - Clean up some static function interfaces (Christian König) - Fix Handling -EALREADY from ticketlocking in the loop by skipping to the next item. (Intel CI) Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250623155313.4901-4-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_bo.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index ab9a6b343a53..894ff7ccd68e 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -529,10 +529,15 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T) * up at looping termination, even if terminated prematurely by, for * example a return or break statement. Exiting the loop will also unlock * (if needed) and unreference @_bo. + * + * Return: If locking of a bo returns an error, then iteration is terminated + * and @_bo is set to a corresponding error pointer. It's illegal to + * dereference @_bo after loop exit. */ #define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \ scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \ - for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \ - (_bo) = ttm_bo_lru_cursor_next(_cursor)) + for ((_bo) = ttm_bo_lru_cursor_first(_cursor); \ + !IS_ERR_OR_NULL(_bo); \ + (_bo) = ttm_bo_lru_cursor_next(_cursor)) #endif -- cgit v1.2.3 From 3a95a561f2763e3854e207de3ea821e795a1f1e0 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Thu, 26 Jun 2025 08:08:28 +0200 Subject: uaccess: Define pagefault lock guard Define a pagefault lock guard which allows to simplify functions that need to disable page faults. Signed-off-by: Viktor Malik Link: https://lore.kernel.org/r/8a01beb0b671923976f08297d81242bb2129881d.1750917800.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- include/linux/uaccess.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7c06f4795670..1beb5b395d81 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -296,6 +296,8 @@ static inline bool pagefault_disabled(void) */ #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) +DEFINE_LOCK_GUARD_0(pagefault, pagefault_disable(), pagefault_enable()) + #ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS /** -- cgit v1.2.3 From c430955d0cb87fb7c6b186e457cb3beca4a9c89a Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 3 Jun 2025 22:39:03 -0700 Subject: iio: cros_ec_sensors: add cros_ec_activity driver ChromeOS EC can report activity information derived from the accelerometer: - Reports on-body/off-body as a proximity event. - Reports significant motion as an activity event. This new sensor is a virtual sensor, included only when the EC firmware is compiled with the appropriate module. Signed-off-by: Gwendal Grignou Reviewed-by: Tzung-Bi Shih Link: https://patch.msgid.link/20250604053903.1376465-1-gwendal@google.com Signed-off-by: Jonathan Cameron --- include/linux/iio/common/cros_ec_sensors_core.h | 1 + include/linux/platform_data/cros_ec_commands.h | 26 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index e72167b96d27..bb966abcde53 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -126,5 +126,6 @@ extern const struct dev_pm_ops cros_ec_sensors_pm_ops; /* List of extended channel specification for all sensors. */ extern const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[]; +extern const struct iio_chan_spec_ext_info cros_ec_sensors_limited_info[]; #endif /* __CROS_EC_SENSORS_CORE_H */ diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1f4e4f2b89bb..c19b404e3d8d 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -2388,6 +2388,12 @@ enum motionsense_command { */ MOTIONSENSE_CMD_SENSOR_SCALE = 18, + /* + * Activity management + * Retrieve current status of given activity. + */ + MOTIONSENSE_CMD_GET_ACTIVITY = 20, + /* Number of motionsense sub-commands. */ MOTIONSENSE_NUM_CMDS }; @@ -2447,6 +2453,11 @@ enum motionsensor_orientation { MOTIONSENSE_ORIENTATION_UNKNOWN = 4, }; +struct ec_response_activity_data { + uint8_t activity; /* motionsensor_activity */ + uint8_t state; +} __ec_todo_packed; + struct ec_response_motion_sensor_data { /* Flags for each sensor. */ uint8_t flags; @@ -2460,8 +2471,7 @@ struct ec_response_motion_sensor_data { uint32_t timestamp; }; struct __ec_todo_unpacked { - uint8_t activity; /* motionsensor_activity */ - uint8_t state; + struct ec_response_activity_data activity_data; int16_t add_info[2]; }; }; @@ -2494,6 +2504,7 @@ enum motionsensor_activity { MOTIONSENSE_ACTIVITY_SIG_MOTION = 1, MOTIONSENSE_ACTIVITY_DOUBLE_TAP = 2, MOTIONSENSE_ACTIVITY_ORIENTATION = 3, + MOTIONSENSE_ACTIVITY_BODY_DETECTION = 4, }; struct ec_motion_sense_activity { @@ -2671,6 +2682,7 @@ struct ec_params_motion_sense { uint32_t max_data_vector; } fifo_read; + /* Used for MOTIONSENSE_CMD_SET_ACTIVITY */ struct ec_motion_sense_activity set_activity; /* Used for MOTIONSENSE_CMD_LID_ANGLE */ @@ -2716,6 +2728,12 @@ struct ec_params_motion_sense { */ int16_t hys_degree; } tablet_mode_threshold; + + /* Used for MOTIONSENSE_CMD_GET_ACTIVITY */ + struct __ec_todo_unpacked { + uint8_t sensor_num; + uint8_t activity; /* enum motionsensor_activity */ + } get_activity; }; } __ec_todo_packed; @@ -2833,6 +2851,10 @@ struct ec_response_motion_sense { uint16_t hys_degree; } tablet_mode_threshold; + /* USED for MOTIONSENSE_CMD_GET_ACTIVITY. */ + struct __ec_todo_unpacked { + uint8_t state; + } get_activity; }; } __ec_todo_packed; -- cgit v1.2.3 From 97e6882ed1a16cd22184127abf2bc9b8202f37e0 Mon Sep 17 00:00:00 2001 From: Pop Ioan Daniel Date: Thu, 5 Jun 2025 18:09:40 +0300 Subject: iio: backend: update iio_backend_oversampling_ratio_set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add chan parameter to iio_backend_oversampling_ratio_set() to allow for contexts where the channel must be specified. Modify all existing users. Reviewed-by: David Lechner Reviewed-by: Nuno Sá Signed-off-by: Pop Ioan Daniel Link: https://patch.msgid.link/20250605150948.3091827-3-pop.ioan-daniel@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 1f528fbd9d11..7f815f3fed6a 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -155,7 +155,7 @@ struct iio_backend_ops { enum iio_backend_interface_type *type); int (*data_size_set)(struct iio_backend *back, unsigned int size); int (*oversampling_ratio_set)(struct iio_backend *back, - unsigned int ratio); + unsigned int chan, unsigned int ratio); int (*read_raw)(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); @@ -228,6 +228,7 @@ int iio_backend_interface_type_get(struct iio_backend *back, enum iio_backend_interface_type *type); int iio_backend_data_size_set(struct iio_backend *back, unsigned int size); int iio_backend_oversampling_ratio_set(struct iio_backend *back, + unsigned int chan, unsigned int ratio); int iio_backend_read_raw(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, -- cgit v1.2.3 From 1905e6c9ce018bd7eb9a1722ae689e9cebed24ad Mon Sep 17 00:00:00 2001 From: Jonathan Santos Date: Wed, 11 Jun 2025 08:50:27 -0300 Subject: dt-bindings: iio: adc: ad7768-1: add trigger-sources property In addition to GPIO synchronization, The AD7768-1 also supports synchronization over SPI, which use is recommended when the GPIO cannot provide a pulse synchronous with the base MCLK signal. It consists of looping back the SYNC_OUT to the SYNC_IN pin and send a command via SPI to trigger the synchronization. Introduce the 'trigger-sources' property to enable SPI-based synchronization via SYNC_OUT pin, along with additional optional entries for GPIO3 and DRDY pins. Also create #trigger-source-cells property to differentiate the trigger sources provided by the ADC. To improve readability, create a adi,ad7768-1.h header with the macros for the cell values. While at it, add description to the interrupts property. Acked-by: Conor Dooley Reviewed-by: David Lechner Signed-off-by: Jonathan Santos Link: https://patch.msgid.link/713fd786010c75858700efaec8bb285274e7057e.1749569957.git.Jonathan.Santos@analog.com Signed-off-by: Jonathan Cameron --- include/dt-bindings/iio/adc/adi,ad7768-1.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/dt-bindings/iio/adc/adi,ad7768-1.h (limited to 'include') diff --git a/include/dt-bindings/iio/adc/adi,ad7768-1.h b/include/dt-bindings/iio/adc/adi,ad7768-1.h new file mode 100644 index 000000000000..34d92856a50b --- /dev/null +++ b/include/dt-bindings/iio/adc/adi,ad7768-1.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_ADI_AD7768_1_H +#define _DT_BINDINGS_ADI_AD7768_1_H + +#define AD7768_TRIGGER_SOURCE_SYNC_OUT 0 +#define AD7768_TRIGGER_SOURCE_GPIO3 1 +#define AD7768_TRIGGER_SOURCE_DRDY 2 + +#endif /* _DT_BINDINGS_ADI_AD7768_1_H */ -- cgit v1.2.3 From 12ffc3b1513ebc1f11ae77d053948504a94a68a6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Jun 2025 16:43:44 -0500 Subject: PM: Restrict swap use to later in the suspend sequence Currently swap is restricted before drivers have had a chance to do their prepare() PM callbacks. Restricting swap this early means that if a driver needs to evict some content from memory into sawp in it's prepare callback, it won't be able to. On AMD dGPUs this can lead to failed suspends under memory pressure situations as all VRAM must be evicted to system memory or swap. Move the swap restriction to right after all devices have had a chance to do the prepare() callback. If there is any problem with the sequence, restore swap in the appropriate dpm resume callbacks or error handling paths. Closes: https://github.com/ROCm/ROCK-Kernel-Driver/issues/174 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello Tested-by: Nat Wittstock Tested-by: Lucian Langa Link: https://patch.msgid.link/20250613214413.4127087-1-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) -- cgit v1.2.3 From 08bf1663c21a3e815eda28fa242d84c945ca3b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Tue, 10 Jun 2025 10:22:53 +0200 Subject: dmaengine: Add devm_dma_request_chan() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expand the arsenal of devm functions for DMA devices, this time for requesting channels. Signed-off-by: Bence Csókás Link: https://lore.kernel.org/r/20250610082256.400492-2-csokas.bence@prolan.hu Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index bb146c5ac3e4..6de7c05d6bd8 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1524,6 +1524,7 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, struct dma_chan *dma_request_chan(struct device *dev, const char *name); struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask); +struct dma_chan *devm_dma_request_chan(struct device *dev, const char *name); void dma_release_channel(struct dma_chan *chan); int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps); @@ -1560,6 +1561,12 @@ static inline struct dma_chan *dma_request_chan_by_mask( { return ERR_PTR(-ENODEV); } + +static inline struct dma_chan *devm_dma_request_chan(struct device *dev, const char *name) +{ + return ERR_PTR(-ENODEV); +} + static inline void dma_release_channel(struct dma_chan *chan) { } -- cgit v1.2.3 From e04c3521df073b688b9e9e2213cd3c588e3b6e68 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 25 Jun 2025 10:37:10 -0700 Subject: drm/fourcc: Add missing half-float formats Not something that is likely to be scanned out, but GPUs usually support half-float formats with 1, 2, or possibly 3 components, and it is useful to be able to import/export them with a valid fourcc, and/or use gbm to create them. These correspond to PIPE_FORMAT_{R16,R16G16,R16G16B16}_FLOAT in mesa. Signed-off-by: Rob Clark Acked-by: Simona Vetter Acked-by: Daniel Stone Link: https://lore.kernel.org/r/20250625173712.116446-2-robin.clark@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/uapi/drm/drm_fourcc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 6483f76a2165..89a82d02b7e8 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -218,7 +218,7 @@ extern "C" { #define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ /* - * Floating point 64bpp RGB + * Half-Floating point - 16b/component * IEEE 754-2008 binary16 half-precision float * [15:0] sign:exponent:mantissa 1:5:10 */ @@ -228,6 +228,10 @@ extern "C" { #define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ #define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ +#define DRM_FORMAT_R16F fourcc_code('R', ' ', ' ', 'H') /* [15:0] R 16 little endian */ +#define DRM_FORMAT_GR1616F fourcc_code('G', 'R', ' ', 'H') /* [31:0] G:R 16:16 little endian */ +#define DRM_FORMAT_BGR161616F fourcc_code('B', 'G', 'R', 'H') /* [47:0] B:G:R 16:16:16 little endian */ + /* * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits * of unused padding per component: -- cgit v1.2.3 From 3529cb5ab16b4f1f8bbc31dc39a1076a94bd1e38 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 25 Jun 2025 10:37:11 -0700 Subject: drm/fourcc: Add 32b float formats Add 1, 2, 3, and 4 component 32b float formats, so that buffers with these formats can be imported/exported with fourcc+modifier, and/or created by gbm. These correspond to PIPE_FORMAT_{R32,R32G32,R32G32B32,R32G32B32A32}_FLOAT in mesa. v2: Fix comment describing float32 layout [Sima] Signed-off-by: Rob Clark Acked-by: Simona Vetter Acked-by: Daniel Stone Link: https://lore.kernel.org/r/20250625173712.116446-3-robin.clark@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/uapi/drm/drm_fourcc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 89a82d02b7e8..0d375c5bfc9d 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -232,6 +232,16 @@ extern "C" { #define DRM_FORMAT_GR1616F fourcc_code('G', 'R', ' ', 'H') /* [31:0] G:R 16:16 little endian */ #define DRM_FORMAT_BGR161616F fourcc_code('B', 'G', 'R', 'H') /* [47:0] B:G:R 16:16:16 little endian */ +/* + * Floating point - 32b/component + * IEEE 754-2008 binary32 float + * [31:0] sign:exponent:mantissa 1:8:23 + */ +#define DRM_FORMAT_R32F fourcc_code('R', ' ', ' ', 'F') /* [31:0] R 32 little endian */ +#define DRM_FORMAT_GR3232F fourcc_code('G', 'R', ' ', 'F') /* [63:0] R:G 32:32 little endian */ +#define DRM_FORMAT_BGR323232F fourcc_code('B', 'G', 'R', 'F') /* [95:0] R:G:B 32:32:32 little endian */ +#define DRM_FORMAT_ABGR32323232F fourcc_code('A', 'B', '8', 'F') /* [127:0] R:G:B:A 32:32:32:32 little endian */ + /* * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits * of unused padding per component: -- cgit v1.2.3 From ce57bc9771411d6d27f2ca7b40396cbd7d684ba9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 18:23:07 +0300 Subject: regulator: core: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250626152307.322627-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/regulator/coupler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regulator/coupler.h b/include/linux/regulator/coupler.h index 73291f280a23..5e314a4294fb 100644 --- a/include/linux/regulator/coupler.h +++ b/include/linux/regulator/coupler.h @@ -8,7 +8,8 @@ #ifndef __LINUX_REGULATOR_COUPLER_H_ #define __LINUX_REGULATOR_COUPLER_H_ -#include +#include +#include #include struct regulator_coupler; -- cgit v1.2.3 From 040ae95a984f04afa5d300a97d11643a1577aa72 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 25 Jun 2025 18:21:55 +0800 Subject: net: Remove unused function first_net_device_rcu() This is unused since commit f04565ddf52e ("dev: use name hash for dev_seq_ops") Signed-off-by: Yue Haibing Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250625102155.483570-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index db5bfd4e7ec8..5847c20994d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3316,13 +3316,6 @@ static inline struct net_device *first_net_device(struct net *net) net_device_entry(net->dev_base_head.next); } -static inline struct net_device *first_net_device_rcu(struct net *net) -{ - struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); - - return lh == &net->dev_base_head ? NULL : net_device_entry(lh); -} - int netdev_boot_setup_check(struct net_device *dev); struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, const char *hwaddr); -- cgit v1.2.3 From 2d22b63f3a5aae2088708941d08cf0f01f430a58 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:04:59 +0200 Subject: drm/mipi-dsi: Add dev_is_mipi_dsi function This will be especially useful for generic panels (like panel-simple) which can take different code path depending on if they are MIPI-DSI devices or platform devices. Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-1-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_mipi_dsi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI -- cgit v1.2.3 From 3dd922c418903be7dd8df7212f968e94d0fe73c7 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 24 Jun 2025 11:01:10 +0200 Subject: drm/panic: Add a private field to struct drm_scanout_buffer This allows driver to set some private data in get_scanout_buffer(), and re-use them in set_pixel() callback. Signed-off-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250624091501.257661-2-jfalempe@redhat.com Signed-off-by: Maarten Lankhorst --- include/drm/drm_panic.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index 310c88c4d336..ac0e46b73436 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -72,6 +72,12 @@ struct drm_scanout_buffer { void (*set_pixel)(struct drm_scanout_buffer *sb, unsigned int x, unsigned int y, u32 color); + /** + * @private: private pointer that you can use in the callbacks + * set_pixel() + */ + void *private; + }; #ifdef CONFIG_DRM_PANIC -- cgit v1.2.3 From 718370ff283284f191155a5eb9d4f376aaf93bb8 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 24 Jun 2025 11:01:14 +0200 Subject: drm/ttm: Add ttm_bo_kmap_try_from_panic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the ttm bo is backed by pages, then it's possible to safely kmap one page at a time, using kmap_try_from_panic(). Unfortunately there is no way to do the same with ioremap, so it only supports the kmap case. This is needed for proper drm_panic support with xe driver. Signed-off-by: Jocelyn Falempe Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250624091501.257661-6-jfalempe@redhat.com Signed-off-by: Maarten Lankhorst --- include/drm/ttm/ttm_bo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 8ad6e2713625..5332ee74d95b 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -401,6 +401,7 @@ int ttm_bo_init_validate(struct ttm_device *bdev, struct ttm_buffer_object *bo, int ttm_bo_kmap(struct ttm_buffer_object *bo, unsigned long start_page, unsigned long num_pages, struct ttm_bo_kmap_obj *map); void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map); +void *ttm_bo_kmap_try_from_panic(struct ttm_buffer_object *bo, unsigned long page); int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map); void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map); int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo); -- cgit v1.2.3 From 017a6f7e7e25017eef9c30946cb0e7c803cb3f35 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 27 Jun 2025 13:34:15 +0300 Subject: firmware: sysfb: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Reviewed-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250627103454.702606-1-andriy.shevchenko@linux.intel.com Signed-off-by: Javier Martinez Canillas --- include/linux/sysfb.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 07cbab516942..b449665c686a 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -7,9 +7,13 @@ * Copyright (c) 2012-2013 David Herrmann */ -#include +#include +#include + #include +struct device; +struct platform_device; struct screen_info; enum { -- cgit v1.2.3 From 8ad00a81d769ad81aac0ef65a4adbc2f53169b3c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 25 Jun 2025 13:48:22 +0200 Subject: drm/format-helper: Split off byte swapping from drm_fb_xrgb8888_to_rgb565() Move big-endian support from drm_fb_xrgb8888_to_rgb565() into the new helper drm_xrgb8888_to_rgb565be(). The functionality is required for displays with big-endian byte order. Update all callers. With the change applied, drm_fb_xrgb8888_to_rgb565() has the same signature as the other conversion functions, which is required for further updates to drm_fb_blit(). Also makes the format-conversion helper available to panic handlers, if necessary. Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250625114911.1121301-1-tzimmermann@suse.de --- include/drm/drm_format_helper.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 0d3ee2a1313f..562bc383ece4 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -82,8 +82,10 @@ void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pi const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, struct drm_format_conv_state *state, - bool swab); + const struct drm_rect *clip, struct drm_format_conv_state *state); +void drm_fb_xrgb8888_to_rgb565be(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state); -- cgit v1.2.3 From 792ea7b6cafa46f8e6d6f40c557e614358a89520 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 9 Jun 2025 17:41:31 -0300 Subject: iommu: Remove ops->pgsize_bitmap No driver uses it now, remove the core code. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Tested-by: Nicolin Chen Link: https://lore.kernel.org/r/7-v2-68a2e1ba507c+1fb-iommu_rm_ops_pgsize_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 156732807994..7073be1d8841 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -604,7 +604,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * It is required to call iommufd_viommu_alloc() helper for * a bundled allocation of the core and the driver structures, * using the given @ictx pointer. - * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity * translation. @@ -659,7 +658,6 @@ struct iommu_ops { struct iommufd_ctx *ictx, unsigned int viommu_type); const struct iommu_domain_ops *default_domain_ops; - unsigned long pgsize_bitmap; struct module *owner; struct iommu_domain *identity_domain; struct iommu_domain *blocked_domain; -- cgit v1.2.3 From 3c2968fcd72c4b130894d1a2b835e18474c559e2 Mon Sep 17 00:00:00 2001 From: Junhui Liu Date: Fri, 13 Jun 2025 16:49:23 +0800 Subject: dt-bindings: reset: add support for canaan,k230-rst Introduces a reset controller driver for the Kendryte K230 SoC, resposible for managing the reset functionality of the CPUs and various sub-modules. Reviewed-by: Conor Dooley Reviewed-by: Chen Wang Signed-off-by: Junhui Liu Link: https://lore.kernel.org/r/20250613-k230-reset-v4-1-e5266d2be440@pigmoral.tech Signed-off-by: Philipp Zabel --- include/dt-bindings/reset/canaan,k230-rst.h | 90 +++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 include/dt-bindings/reset/canaan,k230-rst.h (limited to 'include') diff --git a/include/dt-bindings/reset/canaan,k230-rst.h b/include/dt-bindings/reset/canaan,k230-rst.h new file mode 100644 index 000000000000..e4f6612607fe --- /dev/null +++ b/include/dt-bindings/reset/canaan,k230-rst.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2023-2024 Canaan Bright Sight Co., Ltd + * Copyright (C) 2024-2025 Junhui Liu + */ +#ifndef _DT_BINDINGS_CANAAN_K230_RST_H_ +#define _DT_BINDINGS_CANAAN_K230_RST_H_ + +#define RST_CPU0 0 +#define RST_CPU1 1 +#define RST_CPU0_FLUSH 2 +#define RST_CPU1_FLUSH 3 +#define RST_AI 4 +#define RST_VPU 5 +#define RST_HISYS 6 +#define RST_HISYS_AHB 7 +#define RST_SDIO0 8 +#define RST_SDIO1 9 +#define RST_SDIO_AXI 10 +#define RST_USB0 11 +#define RST_USB1 12 +#define RST_USB0_AHB 13 +#define RST_USB1_AHB 14 +#define RST_SPI0 15 +#define RST_SPI1 16 +#define RST_SPI2 17 +#define RST_SEC 18 +#define RST_PDMA 19 +#define RST_SDMA 20 +#define RST_DECOMPRESS 21 +#define RST_SRAM 22 +#define RST_SHRM_AXIM 23 +#define RST_SHRM_AXIS 24 +#define RST_NONAI2D 25 +#define RST_MCTL 26 +#define RST_ISP 27 +#define RST_ISP_DW 28 +#define RST_DPU 29 +#define RST_DISP 30 +#define RST_GPU 31 +#define RST_AUDIO 32 +#define RST_TIMER0 33 +#define RST_TIMER1 34 +#define RST_TIMER2 35 +#define RST_TIMER3 36 +#define RST_TIMER4 37 +#define RST_TIMER5 38 +#define RST_TIMER_APB 39 +#define RST_HDI 40 +#define RST_WDT0 41 +#define RST_WDT1 42 +#define RST_WDT0_APB 43 +#define RST_WDT1_APB 44 +#define RST_TS_APB 45 +#define RST_MAILBOX 46 +#define RST_STC 47 +#define RST_PMU 48 +#define RST_LOSYS_APB 49 +#define RST_UART0 50 +#define RST_UART1 51 +#define RST_UART2 52 +#define RST_UART3 53 +#define RST_UART4 54 +#define RST_I2C0 55 +#define RST_I2C1 56 +#define RST_I2C2 57 +#define RST_I2C3 58 +#define RST_I2C4 59 +#define RST_JAMLINK0_APB 60 +#define RST_JAMLINK1_APB 61 +#define RST_JAMLINK2_APB 62 +#define RST_JAMLINK3_APB 63 +#define RST_CODEC_APB 64 +#define RST_GPIO_DB 65 +#define RST_GPIO_APB 66 +#define RST_ADC 67 +#define RST_ADC_APB 68 +#define RST_PWM_APB 69 +#define RST_SHRM_APB 70 +#define RST_CSI0 71 +#define RST_CSI1 72 +#define RST_CSI2 73 +#define RST_CSI_DPHY 74 +#define RST_ISP_AHB 75 +#define RST_M0 76 +#define RST_M1 77 +#define RST_M2 78 +#define RST_SPI2AXI 79 + +#endif -- cgit v1.2.3 From 05bc6e6290f91d2d40086ab4ef52da21c14ec4b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:31 +0200 Subject: timekeeping: Provide time getters for auxiliary clocks Provide interfaces similar to the ktime_get*() family which provide access to the auxiliary clocks. These interfaces have a boolean return value, which indicates whether the accessed clock is valid or not. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183757.868342628@linutronix.de --- include/linux/posix-timers.h | 5 +++++ include/linux/timekeeping.h | 11 +++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dd48c64b605e..4d3dbcef379e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -37,6 +37,11 @@ static inline int clockid_to_fd(const clockid_t clk) return ~(clk >> 3); } +static inline bool clockid_aux_valid(clockid_t id) +{ + return IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) && id >= CLOCK_AUX && id <= CLOCK_AUX_LAST; +} + #ifdef CONFIG_POSIX_TIMERS #include diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 542773650200..de9a3b7d7d0d 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -263,6 +263,17 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); +/* + * Auxiliary clock interfaces + */ +#ifdef CONFIG_POSIX_AUX_CLOCKS +extern bool ktime_get_aux(clockid_t id, ktime_t *kt); +extern bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt); +#else +static inline bool ktime_get_aux(clockid_t id, ktime_t *kt) { return false; } +static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; } +#endif + /** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value -- cgit v1.2.3 From c5b60592886f97b01503c1bb553f88d6a7df42ea Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Fri, 27 Jun 2025 09:58:54 +0200 Subject: interconnect: avoid memory allocation when 'icc_bw_lock' is held The 'icc_bw_lock' mutex is introduced in commit af42269c3523 ("interconnect: Fix locking for runpm vs reclaim") in order to decouple serialization of bw aggregation from codepaths that require memory allocation. However commit d30f83d278a9 ("interconnect: core: Add dynamic id allocation support") added a devm_kasprintf() call into a path protected by the 'icc_bw_lock' which causes the following lockdep warning on machines like the Lenovo ThinkPad X13s: ====================================================== WARNING: possible circular locking dependency detected 6.16.0-rc3 #15 Not tainted ------------------------------------------------------ (udev-worker)/342 is trying to acquire lock: ffffb973f7ec4638 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc_node_track_caller_noprof+0xa0/0x3e0 but task is already holding lock: ffffb973f7f7f0e8 (icc_bw_lock){+.+.}-{4:4}, at: icc_node_add+0x44/0x154 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (icc_bw_lock){+.+.}-{4:4}: icc_init+0x48/0x108 do_one_initcall+0x64/0x30c kernel_init_freeable+0x27c/0x500 kernel_init+0x20/0x1d8 ret_from_fork+0x10/0x20 -> #0 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x136c/0x2114 lock_acquire+0x1c8/0x354 fs_reclaim_acquire+0x74/0xa8 __kmalloc_node_track_caller_noprof+0xa0/0x3e0 devm_kmalloc+0x54/0x124 devm_kvasprintf+0x74/0xd4 devm_kasprintf+0x58/0x80 icc_node_add+0xb4/0x154 qcom_osm_l3_probe+0x20c/0x314 [icc_osm_l3] platform_probe+0x68/0xd8 really_probe+0xc0/0x38c __driver_probe_device+0x7c/0x160 driver_probe_device+0x40/0x110 __driver_attach+0xfc/0x208 bus_for_each_dev+0x74/0xd0 driver_attach+0x24/0x30 bus_add_driver+0x110/0x234 driver_register+0x60/0x128 __platform_driver_register+0x24/0x30 osm_l3_driver_init+0x20/0x1000 [icc_osm_l3] do_one_initcall+0x64/0x30c do_init_module+0x58/0x23c load_module+0x1df8/0x1f70 init_module_from_file+0x88/0xc4 idempotent_init_module+0x188/0x280 __arm64_sys_finit_module+0x6c/0xd8 invoke_syscall+0x48/0x110 el0_svc_common.constprop.0+0xc0/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x4c/0x158 el0t_64_sync_handler+0xc8/0xcc el0t_64_sync+0x198/0x19c other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(icc_bw_lock); lock(fs_reclaim); lock(icc_bw_lock); lock(fs_reclaim); *** DEADLOCK *** The icc_node_add() functions is not designed to fail, and as such it should not do any memory allocation. In order to avoid this, add a new helper function for the name generation to be called by drivers which are using the new dynamic id feature. Fixes: d30f83d278a9 ("interconnect: core: Add dynamic id allocation support") Signed-off-by: Gabor Juhos Link: https://lore.kernel.org/r/20250625-icc-bw-lockdep-v3-1-2b8f8b8987c4@gmail.com Co-developed-by: Johan Hovold Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250627075854.26943-1-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- include/linux/interconnect-provider.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 55cfebc658e6..8a2f652a05ff 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -119,6 +119,7 @@ int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, struct icc_node *icc_node_create_dyn(void); struct icc_node *icc_node_create(int id); void icc_node_destroy(int id); +int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, const char *name); int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node); int icc_link_create(struct icc_node *node, const int dst_id); void icc_node_add(struct icc_node *node, struct icc_provider *provider); @@ -152,6 +153,12 @@ static inline void icc_node_destroy(int id) { } +static inline int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, + const char *name) +{ + return -EOPNOTSUPP; +} + static inline int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node) { return -EOPNOTSUPP; -- cgit v1.2.3 From 8d68411a128705f86da7f037e1c33d81786fee96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jun 2025 15:30:16 +0000 Subject: tcp: remove rtx_syn_ack field Now inet_rtx_syn_ack() is only used by TCP, it can directly call tcp_rtx_synack() instead of using an indirect call to req->rsk_ops->rtx_syn_ack(). Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250626153017.2156274-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/request_sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b07b1cd14e9f..bad7d16a5515 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -30,8 +30,6 @@ struct request_sock_ops { unsigned int obj_size; struct kmem_cache *slab; char *slab_name; - int (*rtx_syn_ack)(const struct sock *sk, - struct request_sock *req); void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, -- cgit v1.2.3 From cf56a98202970adf298df5caaa225ed68350e9ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jun 2025 15:30:17 +0000 Subject: tcp: remove inet_rtx_syn_ack() inet_rtx_syn_ack() is a simple wrapper around tcp_rtx_synack(), if we move req->num_retrans update. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250626153017.2156274-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/request_sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index bad7d16a5515..6a5ec1418e85 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -39,8 +39,6 @@ struct request_sock_ops { void (*syn_ack_timeout)(const struct request_sock *req); }; -int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); - struct saved_syn { u32 mac_hdrlen; u32 network_hdrlen; -- cgit v1.2.3 From 7f15ee35972dd3dee37704bfd0f136290f6d63d9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 26 Jun 2025 15:52:17 +0200 Subject: dpll: add reference-sync netlink attribute Add new netlink attribute to allow user space configuration of reference sync pin pairs, where both pins are used to provide one clock signal consisting of both: base frequency and sync signal. Reviewed-by: Przemek Kitszel Reviewed-by: Milena Olech Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Link: https://patch.msgid.link/20250626135219.1769350-2-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 349e1b3ca1ae..37b438ce8efc 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -249,6 +249,7 @@ enum dpll_a_pin { DPLL_A_PIN_ESYNC_FREQUENCY, DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED, DPLL_A_PIN_ESYNC_PULSE, + DPLL_A_PIN_REFERENCE_SYNC, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) -- cgit v1.2.3 From 58256a26bfb37a94738dd65618b1f31f460f8d91 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 26 Jun 2025 15:52:18 +0200 Subject: dpll: add reference sync get/set Define function for reference sync pin registration and callback ops to set/get current feature state. Implement netlink handler to fill netlink messages with reference sync pin configuration of capable pins (pin-get). Implement netlink handler to call proper ops and configure reference sync pin state (pin-set). Reviewed-by: Przemek Kitszel Reviewed-by: Milena Olech Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Link: https://patch.msgid.link/20250626135219.1769350-3-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- include/linux/dpll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 6ad6c2968a28..fa1e76920d0e 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -103,6 +103,16 @@ struct dpll_pin_ops { const struct dpll_device *dpll, void *dpll_priv, struct dpll_pin_esync *esync, struct netlink_ext_ack *extack); + int (*ref_sync_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *ref_sync_pin, + void *ref_sync_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*ref_sync_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *ref_sync_pin, + void *ref_sync_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { @@ -202,6 +212,9 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, const struct dpll_pin_ops *ops, void *priv); +int dpll_pin_ref_sync_pair_add(struct dpll_pin *pin, + struct dpll_pin *ref_sync_pin); + int dpll_device_change_ntf(struct dpll_device *dpll); int dpll_pin_change_ntf(struct dpll_pin *pin); -- cgit v1.2.3 From 1aa93cfb1288a141c64e923dbaaa277616f0f7d5 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 26 Feb 2024 14:25:43 +0100 Subject: drm/fourcc: Add RGB161616 and BGR161616 formats Add FourCC definitions for the 48-bit RGB/BGR formats to the DRM/KMS uapi. The format will be used by the Raspberry Pi PiSP Back End, supported by a V4L2 driver in kernel space and by libcamera in userspace, which uses the DRM FourCC identifiers. Signed-off-by: Jacopo Mondi Reviewed-by: Rob Clark Reviewed-by: Simon Ser Reviewed-by: Naushir Patuck Link: https://lore.kernel.org/r/20240226132544.82817-1-jacopo.mondi@ideasonboard.com Signed-off-by: Javier Martinez Canillas --- include/uapi/drm/drm_fourcc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 0d375c5bfc9d..ea91aa8afde9 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -210,6 +210,10 @@ extern "C" { #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* 48 bpp RGB */ +#define DRM_FORMAT_RGB161616 fourcc_code('R', 'G', '4', '8') /* [47:0] R:G:B 16:16:16 little endian */ +#define DRM_FORMAT_BGR161616 fourcc_code('B', 'G', '4', '8') /* [47:0] B:G:R 16:16:16 little endian */ + /* 64 bpp RGB */ #define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */ #define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */ -- cgit v1.2.3 From ddaad4ad774d4ae02047ef873a8e38f62a4b7b01 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 18 Jun 2025 22:22:50 +0200 Subject: mtd: nand: qpic_common: prevent out of bounds access of BAM arrays The common QPIC code does not do any boundary checking when it handles the command elements and scatter gater list arrays of a BAM transaction, thus it allows to access out of bounds elements in those. Although it is the responsibility of the given driver to allocate enough space for all possible BAM transaction variations, however there can be mistakes in the driver code which can lead to hidden memory corruption issues which are hard to debug. This kind of problem has been observed during testing the 'spi-qpic-snand' driver. Although the driver has been fixed with a preceding patch, but it still makes sense to reduce the chance of having such errors again later. In order to prevent such errors, change the qcom_alloc_bam_transaction() function to store the number of elements of the arrays in the 'bam_transaction' strucutre during allocation. Also, add sanity checks to the qcom_prep_bam_dma_desc_{cmd,data}() functions to avoid using out of bounds indices for the arrays. Tested-by: Lakshmi Sowjanya D # on SDX75 Acked-by: Miquel Raynal Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250618-qpic-snand-avoid-mem-corruption-v3-2-319c71296cda@gmail.com Signed-off-by: Mark Brown --- include/linux/mtd/nand-qpic-common.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..f0aa098a395f 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -237,6 +237,9 @@ * @last_data_desc - last DMA desc in data channel (tx/rx). * @last_cmd_desc - last DMA desc in command channel. * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array * @bam_ce_pos - the index in bam_ce which is available for next sgl * @bam_ce_start - the index in bam_ce which marks the start position ce * for current sgl. It will be used for size calculation @@ -255,6 +258,11 @@ struct bam_transaction { struct dma_async_tx_descriptor *last_data_desc; struct dma_async_tx_descriptor *last_cmd_desc; struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + struct_group(bam_positions, u32 bam_ce_pos; u32 bam_ce_start; -- cgit v1.2.3 From 96893cdd4760ad94a438c1523cc5ca2470e04670 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 29 Jun 2025 18:15:32 +0200 Subject: spi: Raise limit on number of chip selects to 24 We have a system which uses 24 SPI chip selects, raise the hard coded limit accordingly. Signed-off-by: Marc Kleine-Budde Link: https://patch.msgid.link/20250629-spi-increase-number-of-cs-v2-1-85a0a09bab32@pengutronix.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4789f91dae94..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; -- cgit v1.2.3 From 24368a744bafce7daf1eafd6a163871925ee5892 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 2 May 2025 21:32:01 -0400 Subject: sanitize handling of long-term internal mounts Original rationale for those had been the reduced cost of mntput() for the stuff that is mounted somewhere. Mount refcount increments and decrements are frequent; what's worse, they tend to concentrate on the same instances and cacheline pingpong is quite noticable. As the result, mount refcounts are per-cpu; that allows a very cheap increment. Plain decrement would be just as easy, but decrement-and-test is anything but (we need to add the components up, with exclusion against possible increment-from-zero, etc.). Fortunately, there is a very common case where we can tell that decrement won't be the final one - if the thing we are dropping is currently mounted somewhere. We have an RCU delay between the removal from mount tree and dropping the reference that used to pin it there, so we can just take rcu_read_lock() and check if the victim is mounted somewhere. If it is, we can go ahead and decrement without and further checks - the reference we are dropping is not the last one. If it isn't, we get all the fun with locking, carefully adding up components, etc., but the majority of refcount decrements end up taking the fast path. There is a major exception, though - pipes and sockets. Those live on the internal filesystems that are not going to be mounted anywhere. They are not going to be _un_mounted, of course, so having to take the slow path every time a pipe or socket gets closed is really obnoxious. Solution had been to mark them as long-lived ones - essentially faking "they are mounted somewhere" indicator. With minor modification that works even for ones that do eventually get dropped - all it takes is making sure we have an RCU delay between clearing the "mounted somewhere" indicator and dropping the reference. There are some additional twists (if you want to drop a dozen of such internal mounts, you'd be better off with clearing the indicator on all of them, doing an RCU delay once, then dropping the references), but in the basic form it had been * use kern_mount() if you want your internal mount to be a long-term one. * use kern_unmount() to undo that. Unfortunately, the things did rot a bit during the mount API reshuffling. In several cases we have lost the "fake the indicator" part; kern_unmount() on the unmount side remained (it doesn't warn if you use it on a mount without the indicator), but all benefits regaring mntput() cost had been lost. To get rid of that bitrot, let's add a new helper that would work with fs_context-based API: fc_mount_longterm(). It's a counterpart of fc_mount() that does, on success, mark its result as long-term. It must be paired with kern_unmount() or equivalents. Converted: 1) mqueue (it used to use kern_mount_data() and the umount side is still as it used to be) 2) hugetlbfs (used to use kern_mount_data(), internal mount is never unmounted in this one) 3) i915 gemfs (used to be kern_mount() + manual remount to set options, still uses kern_unmount() on umount side) 4) v3d gemfs (copied from i915) Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 1a508beba446..c145820fcbbf 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -98,6 +98,7 @@ int mnt_get_write_access(struct vfsmount *mnt); void mnt_put_write_access(struct vfsmount *mnt); extern struct vfsmount *fc_mount(struct fs_context *fc); +extern struct vfsmount *fc_mount_longterm(struct fs_context *fc); extern struct vfsmount *vfs_create_mount(struct fs_context *fc); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, -- cgit v1.2.3 From f0d0ba19985d23a3e83d654318ccb6e9c5f1b095 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 14 May 2025 20:50:06 -0400 Subject: Rewrite of propagate_umount() The variant currently in the tree has problems; trying to prove correctness has caught at least one class of bugs (reparenting that ends up moving the visible location of reparented mount, due to not excluding some of the counterparts on propagation that should've been included). I tried to prove that it's the only bug there; I'm still not sure whether it is. If anyone can reconstruct and write down an analysis of the mainline implementation, I'll gladly review it; as it is, I ended up doing a different implementation. Candidate collection phase is similar, but trimming the set down until it satisfies the constraints turned out pretty different. I hoped to do transformation as a massage series, but that turns out to be too convoluted. So it's a single patch replacing propagate_umount() and friends in one go, with notes and analysis in D/f/propagate_umount.txt (in addition to inline comments). As far I can tell, it is provably correct and provably linear by the number of mounts we need to look at in order to decide what should be unmounted. It even builds and seems to survive testing... Another nice thing that fell out of that is that ->mnt_umounting is no longer needed. Compared to the first version: * explicit MNT_UMOUNT_CANDIDATE flag for is_candidate() * trim_ancestors() only clears that flag, leaving the suckers on list * trim_one() and handle_locked() take the stuff with flag cleared off the list. That allows to iterate with list_for_each_entry_safe() when calling trim_one() - it removes at most one element from the list now. * no globals - I didn't bother with any kind of context, not worth it. * Notes updated accordingly; I have not touch the terms yet. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index c145820fcbbf..65fa8442c00a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -40,6 +40,7 @@ enum mount_flags { MNT_INTERNAL = 0x4000, + MNT_UMOUNT_CANDIDATE = 0x020000, MNT_LOCK_ATIME = 0x040000, MNT_LOCK_NOEXEC = 0x080000, MNT_LOCK_NOSUID = 0x100000, @@ -66,7 +67,7 @@ enum mount_flags { MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | - MNT_LOCKED, + MNT_LOCKED | MNT_UMOUNT_CANDIDATE, }; struct vfsmount { -- cgit v1.2.3 From 406fea79992561f47fd3511dd8b7c8abeeff7045 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Jun 2025 18:06:19 -0400 Subject: mount: separate the flags accessed only under namespace_sem Several flags are updated and checked only under namespace_sem; we are already making use of that when we are checking them without mount_lock, but we have to hold mount_lock for all updates, which makes things clumsier than they have to be. Take MNT_SHARED, MNT_UNBINDABLE, MNT_MARKED and MNT_UMOUNT_CANDIDATE into a separate field (->mnt_t_flags), renaming them to T_SHARED, etc. to avoid confusion. All accesses must be under namespace_sem. That changes locking requirements for mnt_change_propagation() and set_mnt_shared() - only namespace_sem is needed now. The same goes for SET_MNT_MARKED et.al. There might be more flags moved from ->mnt_flags to that field; this is just the initial set. Signed-off-by: Al Viro --- include/linux/mount.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 65fa8442c00a..5f9c053b0897 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -35,12 +35,8 @@ enum mount_flags { MNT_SHRINKABLE = 0x100, MNT_WRITE_HOLD = 0x200, - MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ - MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ - MNT_INTERNAL = 0x4000, - MNT_UMOUNT_CANDIDATE = 0x020000, MNT_LOCK_ATIME = 0x040000, MNT_LOCK_NOEXEC = 0x080000, MNT_LOCK_NOSUID = 0x100000, @@ -49,25 +45,15 @@ enum mount_flags { MNT_LOCKED = 0x800000, MNT_DOOMED = 0x1000000, MNT_SYNC_UMOUNT = 0x2000000, - MNT_MARKED = 0x4000000, MNT_UMOUNT = 0x8000000, - /* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ - MNT_SHARED_MASK = MNT_UNBINDABLE, MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME | MNT_READONLY | MNT_NOSYMFOLLOW, MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, - MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | - MNT_LOCKED | MNT_UMOUNT_CANDIDATE, + MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | + MNT_SYNC_UMOUNT | MNT_LOCKED }; struct vfsmount { -- cgit v1.2.3 From a0f26fcc383965e0522b81269062a9278bc802fe Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 27 Jun 2025 09:42:33 +0900 Subject: ata: libata: Remove ATA_DFLAG_ZAC device flag The ATA device flag ATA_DFLAG_ZAC is used to indicate if a devie is a host managed or host aware zoned device. However, this flag is not used in the hot path and only used during device scanning/revalidation and for inquiry and sense SCSI command translation. Save one bit from struct ata_device flags field by replacing this flag with the internal helper function ata_dev_is_zac(). This function returns true if the device class is ATA_DEV_ZAC (host managed ZAC device case) or if its identify data reports it supports the zoned command set (host aware ZAC device case). Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 1e5aec839041..721f0805b6c9 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -144,7 +144,6 @@ enum { ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ - ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */ ATA_DFLAG_FEATURES_MASK = (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ -- cgit v1.2.3 From 2b89eb177c466bb1f84dff8db04d614b33a7ab95 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 19 Jun 2025 18:35:48 +0900 Subject: ata: libata: Improve LPM policies description Improve the comment describing enum ata_lpm_policy and add comments within that enum to describe each of the different possible values. The enum values comments match the description given for the CONFIG_SATA_MOBILE_LPM_POLICY config parameter. No functional changes. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel --- include/linux/libata.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 721f0805b6c9..7462218312ad 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -499,16 +499,28 @@ enum ata_completion_errors { }; /* - * Link power management policy: If you alter this, you also need to - * alter libata-sata.c (for the ascii descriptions) + * Link Power Management (LPM) policies. + * + * The default LPM policy to use for a device link is defined using these values + * with the CONFIG_SATA_MOBILE_LPM_POLICY config option and applied through the + * target_lpm_policy field of struct ata_port. + * + * If you alter this, you also need to alter the policy names used with the + * sysfs attribute link_power_management_policy defined in libata-sata.c. */ enum ata_lpm_policy { + /* Keep firmware settings */ ATA_LPM_UNKNOWN, + /* No power savings (maximum performance) */ ATA_LPM_MAX_POWER, + /* HIPM (Partial) */ ATA_LPM_MED_POWER, - ATA_LPM_MED_POWER_WITH_DIPM, /* Med power + DIPM as win IRST does */ - ATA_LPM_MIN_POWER_WITH_PARTIAL, /* Min Power + partial and slumber */ - ATA_LPM_MIN_POWER, /* Min power + no partial (slumber only) */ + /* HIPM (Partial) and DIPM (Partial and Slumber) */ + ATA_LPM_MED_POWER_WITH_DIPM, + /* HIPM (Partial and DevSleep) and DIPM (Partial and Slumber) */ + ATA_LPM_MIN_POWER_WITH_PARTIAL, + /* HIPM (Slumber and DevSleep) and DIPM (Partial and Slumber) */ + ATA_LPM_MIN_POWER, }; enum ata_lpm_hints { -- cgit v1.2.3 From 04f541cef2dba78b17c83ad8b5a0742012399530 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 19 Jun 2025 12:21:51 +0300 Subject: media: v4l2-ctrls: Return the handler's error in v4l2_ctrl_handler_free() v4l2_ctrl_handler_free() used to return void but changing this to int, returning the handler's error code, enables the drivers to simply return the handler's error in this common error handling pattern: if (handler->error) return v4l2_ctrl_handler_free(handler); Suggested-by: Laurent Pinchart Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Reviewed-by: Hans Verkuil Signed-off-by: Hans Verkuil --- include/media/v4l2-ctrls.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 3a87096e064f..c32c46286441 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -579,8 +579,10 @@ int v4l2_ctrl_handler_init_class(struct v4l2_ctrl_handler *hdl, * @hdl: The control handler. * * Does nothing if @hdl == NULL. + * + * Return: @hdl's error field or 0 if @hdl is NULL. */ -void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl); +int v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl); /** * v4l2_ctrl_lock() - Helper function to lock the handler -- cgit v1.2.3 From 08ad63bbd681ae4eeb50644564435035c38e5795 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 20 Jun 2025 14:58:01 +0200 Subject: gpio: constify arguments of gpiod_is_equal() This function is not meant to modify the GPIO descriptors in any way so we can safely constify both arguments. Link: https://lore.kernel.org/r/20250620-gpiod-is-equal-improv-v1-1-a75060505d2c@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index f0b1982da0cc..00df68c51405 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -181,7 +181,8 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, enum gpiod_flags flags, const char *label); -bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other); +bool gpiod_is_equal(const struct gpio_desc *desc, + const struct gpio_desc *other); #else /* CONFIG_GPIOLIB */ @@ -551,7 +552,7 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, } static inline bool -gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other) +gpiod_is_equal(const struct gpio_desc *desc, const struct gpio_desc *other) { WARN_ON(desc || other); return false; -- cgit v1.2.3 From 62b5848f73dd4f8ae17304dae54562d0c9ecdd3d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 24 Jun 2025 16:32:20 +0200 Subject: power: sequencing: add defines for return values of the match() callback Instead of using 0 and 1 as magic numbers, let's add proper defines whose names tell the reader what the meaning behind them is. Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20250624-pwrseq-match-defines-v1-3-a59d90a951f1@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/pwrseq/provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pwrseq/provider.h b/include/linux/pwrseq/provider.h index cbc3607cbfcf..33b3d2c2e39d 100644 --- a/include/linux/pwrseq/provider.h +++ b/include/linux/pwrseq/provider.h @@ -13,6 +13,9 @@ struct pwrseq_device; typedef int (*pwrseq_power_state_func)(struct pwrseq_device *); typedef int (*pwrseq_match_func)(struct pwrseq_device *, struct device *); +#define PWRSEQ_NO_MATCH 0 +#define PWRSEQ_MATCH_OK 1 + /** * struct pwrseq_unit_data - Configuration of a single power sequencing * unit. -- cgit v1.2.3 From b1c26e059536d8acbf9d508374f4b76537e20fb7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 Jun 2025 15:58:16 -0500 Subject: Move FCH header to a location accessible by all archs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new header fch.h was created to store registers used by different AMD drivers. This header was included by i2c-piix4 in commit 624b0d5696a8 ("i2c: piix4, x86/platform: Move the SB800 PIIX4 FCH definitions to "). To prevent compile failures on non-x86 archs i2c-piix4 was set to only compile on x86 by commit 7e173eb82ae9717 ("i2c: piix4: Make CONFIG_I2C_PIIX4 dependent on CONFIG_X86"). This was not a good decision because loongarch and mips both actually support i2c-piix4 and set it enabled in the defconfig. Move the header to a location accessible by all architectures. Fixes: 624b0d5696a89 ("i2c: piix4, x86/platform: Move the SB800 PIIX4 FCH definitions to ") Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250610205817.3912944-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/amd-fch.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/platform_data/x86/amd-fch.h (limited to 'include') diff --git a/include/linux/platform_data/x86/amd-fch.h b/include/linux/platform_data/x86/amd-fch.h new file mode 100644 index 000000000000..2cf5153edbc2 --- /dev/null +++ b/include/linux/platform_data/x86/amd-fch.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_FCH_H_ +#define _ASM_X86_AMD_FCH_H_ + +#define FCH_PM_BASE 0xFED80300 + +/* Register offsets from PM base: */ +#define FCH_PM_DECODEEN 0x00 +#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19) +#define FCH_PM_SCRATCH 0x80 +#define FCH_PM_S5_RESET_STATUS 0xC0 + +#endif /* _ASM_X86_AMD_FCH_H_ */ -- cgit v1.2.3 From 37d2aa62138daa8ecb6442ae4753704e9c92346f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 24 Jun 2025 13:28:41 +0100 Subject: ASoC: SDCA: Minor selected/detected mode control fixups Make the names a slightly better match for the specification and add some constants for the values rather than hard coding. Reviewed-by: Bard Liao Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250624122844.2761627-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 856b0f40ce5e..4b278513597e 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -319,6 +319,15 @@ enum sdca_selected_mode_range { SDCA_SELECTED_MODE_NCOLS = 2, }; +/** + * enum sdca_detected_mode_values - Predefined GE Detected Mode values + */ +enum sdca_detected_mode_values { + SDCA_DETECTED_MODE_JACK_UNPLUGGED = 0, + SDCA_DETECTED_MODE_JACK_UNKNOWN = 1, + SDCA_DETECTED_MODE_DETECTION_IN_PROGRESS = 2, +}; + /** * enum sdca_spe_controls - SDCA Controls for Security & Privacy Unit * -- cgit v1.2.3 From 775f5729b47d8737f4f98e0141f61b3358245398 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 24 Jun 2025 13:28:42 +0100 Subject: ASoC: SDCA: Add flag for unused IRQs Zero is a valid SDCA IRQ interrupt position so add a special value to indicate that the IRQ is not used. Reviewed-by: Bard Liao Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250624122844.2761627-6-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 4b278513597e..b4a97ff08729 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -17,6 +17,8 @@ struct device; struct sdca_entity; struct sdca_function_desc; +#define SDCA_NO_INTERRUPT -1 + /* * The addressing space for SDCA relies on 7 bits for Entities, so a * maximum of 128 Entities per function can be represented. -- cgit v1.2.3 From b126394d9ec6f9d8322cf392ba23d4a5f96faf5a Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Tue, 24 Jun 2025 13:28:43 +0100 Subject: ASoC: SDCA: Generic interrupt support Add a library supporting usage of SDCA interrupts, using regmap irq framework. The library adds functions for parsing ACPI for interrupt-related information, configuring irq chip and requesting individual irqs. Calling code (SDCA function code) is expected to also substitute the library's base irq handler for its own, appropriate callback. Signed-off-by: Maciej Strozek Reviewed-by: Bard Liao Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250624122844.2761627-7-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_interrupts.h | 75 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 include/sound/sdca_interrupts.h (limited to 'include') diff --git a/include/sound/sdca_interrupts.h b/include/sound/sdca_interrupts.h new file mode 100644 index 000000000000..4cda8b75bae0 --- /dev/null +++ b/include/sound/sdca_interrupts.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + * + * Copyright (C) 2025 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef __SDCA_INTERRUPTS_H__ +#define __SDCA_INTERRUPTS_H__ + +#include +#include +#include + +struct device; +struct snd_soc_component; +struct sdca_function_data; + +#define SDCA_MAX_INTERRUPTS 31 /* the last bit is reserved for future extensions */ + +/** + * struct sdca_interrupt - contains information about a single SDCA interrupt + * @name: The name of the interrupt. + * @component: Pointer to the ASoC component owns the interrupt. + * @function: Pointer to the Function that the interrupt is associated with. + * @entity: Pointer to the Entity that the interrupt is associated with. + * @control: Pointer to the Control that the interrupt is associated with. + * @externally_requested: Internal flag used to check if a client driver has + * already requested the interrupt, for custom handling, allowing the core to + * skip handling this interrupt. + */ +struct sdca_interrupt { + const char *name; + + struct snd_soc_component *component; + struct sdca_function_data *function; + struct sdca_entity *entity; + struct sdca_control *control; + + bool externally_requested; +}; + +/** + * struct sdca_interrupt_info - contains top-level SDCA interrupt information + * @irq_chip: regmap irq chip structure. + * @irq_data: regmap irq chip data structure. + * @irqs: Array of data for each individual IRQ. + * @irq_lock: Protects access to the list of sdca_interrupt structures. + */ +struct sdca_interrupt_info { + struct regmap_irq_chip irq_chip; + struct regmap_irq_chip_data *irq_data; + + struct sdca_interrupt irqs[SDCA_MAX_INTERRUPTS]; + + struct mutex irq_lock; /* Protect irqs list across functions */ +}; + +int sdca_irq_request(struct device *dev, struct sdca_interrupt_info *interrupt_info, + int sdca_irq, const char *name, irq_handler_t handler, + void *data); +int sdca_irq_data_populate(struct snd_soc_component *component, + struct sdca_function_data *function, + struct sdca_entity *entity, + struct sdca_control *control, + struct sdca_interrupt *interrupt); +int sdca_irq_populate(struct sdca_function_data *function, + struct snd_soc_component *component, + struct sdca_interrupt_info *info); +struct sdca_interrupt_info *sdca_irq_allocate(struct device *dev, + struct regmap *regmap, int irq); + +#endif -- cgit v1.2.3 From b9ab3b61824190b1c6b2c59e7ba4de591f24eb92 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 24 Jun 2025 13:28:44 +0100 Subject: ASoC: SDCA: Add some initial IRQ handlers Add basic IRQ handlers for the function status and jack detection interrupts. Reviewed-by: Bard Liao Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250624122844.2761627-8-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_interrupts.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_interrupts.h b/include/sound/sdca_interrupts.h index 4cda8b75bae0..bbbc3ab27eba 100644 --- a/include/sound/sdca_interrupts.h +++ b/include/sound/sdca_interrupts.h @@ -27,6 +27,7 @@ struct sdca_function_data; * @function: Pointer to the Function that the interrupt is associated with. * @entity: Pointer to the Entity that the interrupt is associated with. * @control: Pointer to the Control that the interrupt is associated with. + * @priv: Pointer to private data for use by the handler. * @externally_requested: Internal flag used to check if a client driver has * already requested the interrupt, for custom handling, allowing the core to * skip handling this interrupt. @@ -39,6 +40,8 @@ struct sdca_interrupt { struct sdca_entity *entity; struct sdca_control *control; + void *priv; + bool externally_requested; }; -- cgit v1.2.3 From 2af612ad42903637b11f9ddf4101f231f1ec5d1d Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:33 +0530 Subject: drm/dp: Introduce new member in drm_backlight_info Introduce luminance_set flag which indicates if we can manipulate backlight using luminance value or not which is only possible after eDP v1.5. Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-2-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index e4ca35143ff9..beafbc31f9d9 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -849,6 +849,7 @@ struct drm_edp_backlight_info { bool lsb_reg_used : 1; bool aux_enable : 1; bool aux_set : 1; + bool luminance_set : 1; }; int -- cgit v1.2.3 From c802a6b81b704bd969946f04f206a881f1abe2dd Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:34 +0530 Subject: drm/dp: Add argument in drm_edp_backlight_init Add bool argument in drm_edp_backlight init to provide the drivers option to choose if they want to use luminance values to manipulate brightness. Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-3-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index beafbc31f9d9..ea38e7807421 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -855,7 +855,7 @@ struct drm_edp_backlight_info { int drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], - u16 *current_level, u8 *current_mode); + u16 *current_level, u8 *current_mode, bool need_luminance); int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, u16 level); int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, -- cgit v1.2.3 From 81fd01414f259ff897054c50750a25cbb0255060 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:35 +0530 Subject: drm/dp: Add argument for max luminance in drm_edp_backlight_init Add new argument to drm_edp_backlight_init which gives the max_luminance which will be needed to set the max values for backlight. --v2 -Use pass only max luminance instead of luminance_range_info struct [Arun] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-4-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index ea38e7807421..0575418f0eb3 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -854,6 +854,7 @@ struct drm_edp_backlight_info { int drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, + u32 max_luminance, u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], u16 *current_level, u8 *current_mode, bool need_luminance); int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, -- cgit v1.2.3 From 08e81e2ded98d726db0dba39030ef59eb49d762c Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:36 +0530 Subject: drm/dp: Move from u16 to u32 for max in drm_edp_backlight_info Use u32 instead of u16 for max variable in drm_edp_backlight_info since it can now hold max luminance range value which is u32. We will set this max with max_luminance value when luminance_set is true. Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-5-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 0575418f0eb3..cf0706762902 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -844,7 +844,7 @@ drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) struct drm_edp_backlight_info { u8 pwmgen_bit_count; u8 pwm_freq_pre_divider; - u16 max; + u32 max; bool lsb_reg_used : 1; bool aux_enable : 1; -- cgit v1.2.3 From 9274a940f1c90c15fb66a8c5488b1b79e4e8c432 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:37 +0530 Subject: drm/dp: Change current_level argument type to u32 Change the current_level argument type to u32 from u16 since it can now carry the value which it gets from DP_EDP_PANEL_TARGET_LUMINANCE_VALUE. Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-6-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index cf0706762902..f23847fc02c9 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -856,7 +856,7 @@ int drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, u32 max_luminance, u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], - u16 *current_level, u8 *current_mode, bool need_luminance); + u32 *current_level, u8 *current_mode, bool need_luminance); int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, u16 level); int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, -- cgit v1.2.3 From 2ff7f0c381c15f457f83439bfdf5823fb33fb436 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:39 +0530 Subject: drm/dp: Change argument type for drm_edp_backlight_set_level Use u32 for level variable as one may need to pass value for DP_EDP_PANEL_TARGET_LUMINANCE_VALUE. --v2 -Typecase is not needed [Jani] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-8-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index f23847fc02c9..e2ffbd5c1cbd 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -858,7 +858,7 @@ drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], u32 *current_level, u8 *current_mode, bool need_luminance); int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, - u16 level); + u32 level); int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, u16 level); int drm_edp_backlight_disable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl); -- cgit v1.2.3 From 05a76aef23df031bca3cdc5c46dd67922f3a15f2 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 20 Jun 2025 12:04:41 +0530 Subject: drm/dp: Change argument type of drm_edp_backlight_enable Change the argument type to u32 for the default level being sent since it has to now account for luminance value which has to be set for DP_EDP_PANEL_LUMINANCE_TARGET_VALUE. --v2 -No need to typecast [Jani] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250620063445.3603086-10-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index e2ffbd5c1cbd..c6c2ba68fb16 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -860,7 +860,7 @@ drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, u32 level); int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, - u16 level); + u32 level); int drm_edp_backlight_disable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl); #if IS_ENABLED(CONFIG_DRM_KMS_HELPER) && (IS_BUILTIN(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ -- cgit v1.2.3 From 7ff495e26a39f3e7a3d4058df59b5b6d6f943cab Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 30 Jun 2025 09:51:38 +0200 Subject: local_lock: Move this_cpu_ptr() notation from internal to main header local_lock.h is the main header for the local_lock_t type and provides wrappers around internal functions prefixed with __ in local_lock_internal.h. Move the this_cpu_ptr() dereference of the variable from the internal to the main header. Since it is all macro implemented, this_cpu_ptr() will still happen within the preempt/ IRQ disabled section. This frees the internal implementation (__) to be used on local_lock_t types which are local variables and must not be accessed via this_cpu_ptr(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Waiman Long Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250630075138.3448715-2-bigeasy@linutronix.de --- include/linux/local_lock.h | 20 ++++++++++---------- include/linux/local_lock_internal.h | 30 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 16a2ee4f8310..2ba846419524 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -13,13 +13,13 @@ * local_lock - Acquire a per CPU local lock * @lock: The lock variable */ -#define local_lock(lock) __local_lock(lock) +#define local_lock(lock) __local_lock(this_cpu_ptr(lock)) /** * local_lock_irq - Acquire a per CPU local lock and disable interrupts * @lock: The lock variable */ -#define local_lock_irq(lock) __local_lock_irq(lock) +#define local_lock_irq(lock) __local_lock_irq(this_cpu_ptr(lock)) /** * local_lock_irqsave - Acquire a per CPU local lock, save and disable @@ -28,19 +28,19 @@ * @flags: Storage for interrupt flags */ #define local_lock_irqsave(lock, flags) \ - __local_lock_irqsave(lock, flags) + __local_lock_irqsave(this_cpu_ptr(lock), flags) /** * local_unlock - Release a per CPU local lock * @lock: The lock variable */ -#define local_unlock(lock) __local_unlock(lock) +#define local_unlock(lock) __local_unlock(this_cpu_ptr(lock)) /** * local_unlock_irq - Release a per CPU local lock and enable interrupts * @lock: The lock variable */ -#define local_unlock_irq(lock) __local_unlock_irq(lock) +#define local_unlock_irq(lock) __local_unlock_irq(this_cpu_ptr(lock)) /** * local_unlock_irqrestore - Release a per CPU local lock and restore @@ -49,7 +49,7 @@ * @flags: Interrupt flags to restore */ #define local_unlock_irqrestore(lock, flags) \ - __local_unlock_irqrestore(lock, flags) + __local_unlock_irqrestore(this_cpu_ptr(lock), flags) /** * local_lock_init - Runtime initialize a lock instance @@ -64,7 +64,7 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define local_trylock(lock) __local_trylock(lock) +#define local_trylock(lock) __local_trylock(this_cpu_ptr(lock)) /** * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable @@ -77,7 +77,7 @@ * HARDIRQ context on PREEMPT_RT. */ #define local_trylock_irqsave(lock, flags) \ - __local_trylock_irqsave(lock, flags) + __local_trylock_irqsave(this_cpu_ptr(lock), flags) DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), @@ -91,10 +91,10 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu, unsigned long flags) #define local_lock_nested_bh(_lock) \ - __local_lock_nested_bh(_lock) + __local_lock_nested_bh(this_cpu_ptr(_lock)) #define local_unlock_nested_bh(_lock) \ - __local_unlock_nested_bh(_lock) + __local_unlock_nested_bh(this_cpu_ptr(_lock)) DEFINE_GUARD(local_lock_nested_bh, local_lock_t __percpu*, local_lock_nested_bh(_T), diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 8d5ac16a9b17..d80b5306a2c0 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -99,14 +99,14 @@ do { \ local_trylock_t *tl; \ local_lock_t *l; \ \ - l = (local_lock_t *)this_cpu_ptr(lock); \ + l = (local_lock_t *)(lock); \ tl = (local_trylock_t *)l; \ _Generic((lock), \ - __percpu local_trylock_t *: ({ \ + local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 0); \ WRITE_ONCE(tl->acquired, 1); \ }), \ - __percpu local_lock_t *: (void)0); \ + local_lock_t *: (void)0); \ local_lock_acquire(l); \ } while (0) @@ -133,7 +133,7 @@ do { \ local_trylock_t *tl; \ \ preempt_disable(); \ - tl = this_cpu_ptr(lock); \ + tl = (lock); \ if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ tl = NULL; \ @@ -150,7 +150,7 @@ do { \ local_trylock_t *tl; \ \ local_irq_save(flags); \ - tl = this_cpu_ptr(lock); \ + tl = (lock); \ if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ tl = NULL; \ @@ -167,15 +167,15 @@ do { \ local_trylock_t *tl; \ local_lock_t *l; \ \ - l = (local_lock_t *)this_cpu_ptr(lock); \ + l = (local_lock_t *)(lock); \ tl = (local_trylock_t *)l; \ local_lock_release(l); \ _Generic((lock), \ - __percpu local_trylock_t *: ({ \ + local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 1); \ WRITE_ONCE(tl->acquired, 0); \ }), \ - __percpu local_lock_t *: (void)0); \ + local_lock_t *: (void)0); \ } while (0) #define __local_unlock(lock) \ @@ -199,11 +199,11 @@ do { \ #define __local_lock_nested_bh(lock) \ do { \ lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + local_lock_acquire((lock)); \ } while (0) #define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) + local_lock_release((lock)) #else /* !CONFIG_PREEMPT_RT */ @@ -227,7 +227,7 @@ typedef spinlock_t local_trylock_t; #define __local_lock(__lock) \ do { \ migrate_disable(); \ - spin_lock(this_cpu_ptr((__lock))); \ + spin_lock((__lock)); \ } while (0) #define __local_lock_irq(lock) __local_lock(lock) @@ -241,7 +241,7 @@ typedef spinlock_t local_trylock_t; #define __local_unlock(__lock) \ do { \ - spin_unlock(this_cpu_ptr((__lock))); \ + spin_unlock((__lock)); \ migrate_enable(); \ } while (0) @@ -252,12 +252,12 @@ typedef spinlock_t local_trylock_t; #define __local_lock_nested_bh(lock) \ do { \ lockdep_assert_in_softirq_func(); \ - spin_lock(this_cpu_ptr(lock)); \ + spin_lock((lock)); \ } while (0) #define __local_unlock_nested_bh(lock) \ do { \ - spin_unlock(this_cpu_ptr((lock))); \ + spin_unlock((lock)); \ } while (0) #define __local_trylock(lock) \ @@ -268,7 +268,7 @@ do { \ __locked = 0; \ } else { \ migrate_disable(); \ - __locked = spin_trylock(this_cpu_ptr((lock))); \ + __locked = spin_trylock((lock)); \ if (!__locked) \ migrate_enable(); \ } \ -- cgit v1.2.3 From b693c703accb08cbd52f0b94d810d6abbca3bfb9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:03:06 -0700 Subject: lib/crypto: sha512: Add support for SHA-384 and SHA-512 Add basic support for SHA-384 and SHA-512 to lib/crypto/. Various in-kernel users will be able to use this instead of the old-school crypto API, which is harder to use and has more overhead. The basic support added by this commit consists of the API and its documentation, backed by a C implementation of the algorithms. sha512_block_generic() is derived from crypto/sha512_generic.c. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160320.2888-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 4912572578dc..f2a6e84b2840 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -129,4 +129,132 @@ static inline void sha224_init(struct sha256_state *sctx) /* Simply use sha256_update as it is equivalent to sha224_update. */ void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); +/* State for the SHA-512 (and SHA-384) compression function */ +struct sha512_block_state { + u64 h[8]; +}; + +/* + * Context structure, shared by SHA-384 and SHA-512. The sha384_ctx and + * sha512_ctx structs wrap this one so that the API has proper typing and + * doesn't allow mixing the SHA-384 and SHA-512 functions arbitrarily. + */ +struct __sha512_ctx { + struct sha512_block_state state; + u64 bytecount_lo; + u64 bytecount_hi; + u8 buf[SHA512_BLOCK_SIZE] __aligned(__alignof__(__be64)); +}; +void __sha512_update(struct __sha512_ctx *ctx, const u8 *data, size_t len); + +/** + * struct sha384_ctx - Context for hashing a message with SHA-384 + * @ctx: private + */ +struct sha384_ctx { + struct __sha512_ctx ctx; +}; + +/** + * sha384_init() - Initialize a SHA-384 context for a new message + * @ctx: the context to initialize + * + * If you don't need incremental computation, consider sha384() instead. + * + * Context: Any context. + */ +void sha384_init(struct sha384_ctx *ctx); + +/** + * sha384_update() - Update a SHA-384 context with message data + * @ctx: the context to update; must have been initialized + * @data: the message data + * @len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void sha384_update(struct sha384_ctx *ctx, + const u8 *data, size_t len) +{ + __sha512_update(&ctx->ctx, data, len); +} + +/** + * sha384_final() - Finish computing a SHA-384 message digest + * @ctx: the context to finalize; must have been initialized + * @out: (output) the resulting SHA-384 message digest + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void sha384_final(struct sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]); + +/** + * sha384() - Compute SHA-384 message digest in one shot + * @data: the message data + * @len: the data length in bytes + * @out: (output) the resulting SHA-384 message digest + * + * Context: Any context. + */ +void sha384(const u8 *data, size_t len, u8 out[SHA384_DIGEST_SIZE]); + +/** + * struct sha512_ctx - Context for hashing a message with SHA-512 + * @ctx: private + */ +struct sha512_ctx { + struct __sha512_ctx ctx; +}; + +/** + * sha512_init() - Initialize a SHA-512 context for a new message + * @ctx: the context to initialize + * + * If you don't need incremental computation, consider sha512() instead. + * + * Context: Any context. + */ +void sha512_init(struct sha512_ctx *ctx); + +/** + * sha512_update() - Update a SHA-512 context with message data + * @ctx: the context to update; must have been initialized + * @data: the message data + * @len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void sha512_update(struct sha512_ctx *ctx, + const u8 *data, size_t len) +{ + __sha512_update(&ctx->ctx, data, len); +} + +/** + * sha512_final() - Finish computing a SHA-512 message digest + * @ctx: the context to finalize; must have been initialized + * @out: (output) the resulting SHA-512 message digest + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void sha512_final(struct sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]); + +/** + * sha512() - Compute SHA-512 message digest in one shot + * @data: the message data + * @len: the data length in bytes + * @out: (output) the resulting SHA-512 message digest + * + * Context: Any context. + */ +void sha512(const u8 *data, size_t len, u8 out[SHA512_DIGEST_SIZE]); + #endif /* _CRYPTO_SHA2_H */ -- cgit v1.2.3 From 23e8b4371dbd5907d633262f36903144a378a114 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:03:07 -0700 Subject: lib/crypto: sha512: Add HMAC-SHA384 and HMAC-SHA512 support Since HMAC support is commonly needed and is fairly simple, include it as a first-class citizen of the SHA-512 library. The API supports both incremental and one-shot computation, and either preparing the key ahead of time or just using a raw key. The implementation is much more streamlined than crypto/hmac.c. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160320.2888-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index f2a6e84b2840..296ce9d468bf 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -147,6 +147,22 @@ struct __sha512_ctx { }; void __sha512_update(struct __sha512_ctx *ctx, const u8 *data, size_t len); +/* + * HMAC key and message context structs, shared by HMAC-SHA384 and HMAC-SHA512. + * The hmac_sha384_* and hmac_sha512_* structs wrap this one so that the API has + * proper typing and doesn't allow mixing the functions arbitrarily. + */ +struct __hmac_sha512_key { + struct sha512_block_state istate; + struct sha512_block_state ostate; +}; +struct __hmac_sha512_ctx { + struct __sha512_ctx sha_ctx; + struct sha512_block_state ostate; +}; +void __hmac_sha512_init(struct __hmac_sha512_ctx *ctx, + const struct __hmac_sha512_key *key); + /** * struct sha384_ctx - Context for hashing a message with SHA-384 * @ctx: private @@ -202,6 +218,109 @@ void sha384_final(struct sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]); */ void sha384(const u8 *data, size_t len, u8 out[SHA384_DIGEST_SIZE]); +/** + * struct hmac_sha384_key - Prepared key for HMAC-SHA384 + * @key: private + */ +struct hmac_sha384_key { + struct __hmac_sha512_key key; +}; + +/** + * struct hmac_sha384_ctx - Context for computing HMAC-SHA384 of a message + * @ctx: private + */ +struct hmac_sha384_ctx { + struct __hmac_sha512_ctx ctx; +}; + +/** + * hmac_sha384_preparekey() - Prepare a key for HMAC-SHA384 + * @key: (output) the key structure to initialize + * @raw_key: the raw HMAC-SHA384 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * Note: the caller is responsible for zeroizing both the struct hmac_sha384_key + * and the raw key once they are no longer needed. + * + * Context: Any context. + */ +void hmac_sha384_preparekey(struct hmac_sha384_key *key, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_sha384_init() - Initialize an HMAC-SHA384 context for a new message + * @ctx: (output) the HMAC context to initialize + * @key: the prepared HMAC key + * + * If you don't need incremental computation, consider hmac_sha384() instead. + * + * Context: Any context. + */ +static inline void hmac_sha384_init(struct hmac_sha384_ctx *ctx, + const struct hmac_sha384_key *key) +{ + __hmac_sha512_init(&ctx->ctx, &key->key); +} + +/** + * hmac_sha384_update() - Update an HMAC-SHA384 context with message data + * @ctx: the HMAC context to update; must have been initialized + * @data: the message data + * @data_len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void hmac_sha384_update(struct hmac_sha384_ctx *ctx, + const u8 *data, size_t data_len) +{ + __sha512_update(&ctx->ctx.sha_ctx, data, data_len); +} + +/** + * hmac_sha384_final() - Finish computing an HMAC-SHA384 value + * @ctx: the HMAC context to finalize; must have been initialized + * @out: (output) the resulting HMAC-SHA384 value + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void hmac_sha384_final(struct hmac_sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]); + +/** + * hmac_sha384() - Compute HMAC-SHA384 in one shot, using a prepared key + * @key: the prepared HMAC key + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA384 value + * + * If you're using the key only once, consider using hmac_sha384_usingrawkey(). + * + * Context: Any context. + */ +void hmac_sha384(const struct hmac_sha384_key *key, + const u8 *data, size_t data_len, u8 out[SHA384_DIGEST_SIZE]); + +/** + * hmac_sha384_usingrawkey() - Compute HMAC-SHA384 in one shot, using a raw key + * @raw_key: the raw HMAC-SHA384 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA384 value + * + * If you're using the key multiple times, prefer to use + * hmac_sha384_preparekey() followed by multiple calls to hmac_sha384() instead. + * + * Context: Any context. + */ +void hmac_sha384_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[SHA384_DIGEST_SIZE]); + /** * struct sha512_ctx - Context for hashing a message with SHA-512 * @ctx: private @@ -257,4 +376,107 @@ void sha512_final(struct sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]); */ void sha512(const u8 *data, size_t len, u8 out[SHA512_DIGEST_SIZE]); +/** + * struct hmac_sha512_key - Prepared key for HMAC-SHA512 + * @key: private + */ +struct hmac_sha512_key { + struct __hmac_sha512_key key; +}; + +/** + * struct hmac_sha512_ctx - Context for computing HMAC-SHA512 of a message + * @ctx: private + */ +struct hmac_sha512_ctx { + struct __hmac_sha512_ctx ctx; +}; + +/** + * hmac_sha512_preparekey() - Prepare a key for HMAC-SHA512 + * @key: (output) the key structure to initialize + * @raw_key: the raw HMAC-SHA512 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * Note: the caller is responsible for zeroizing both the struct hmac_sha512_key + * and the raw key once they are no longer needed. + * + * Context: Any context. + */ +void hmac_sha512_preparekey(struct hmac_sha512_key *key, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_sha512_init() - Initialize an HMAC-SHA512 context for a new message + * @ctx: (output) the HMAC context to initialize + * @key: the prepared HMAC key + * + * If you don't need incremental computation, consider hmac_sha512() instead. + * + * Context: Any context. + */ +static inline void hmac_sha512_init(struct hmac_sha512_ctx *ctx, + const struct hmac_sha512_key *key) +{ + __hmac_sha512_init(&ctx->ctx, &key->key); +} + +/** + * hmac_sha512_update() - Update an HMAC-SHA512 context with message data + * @ctx: the HMAC context to update; must have been initialized + * @data: the message data + * @data_len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void hmac_sha512_update(struct hmac_sha512_ctx *ctx, + const u8 *data, size_t data_len) +{ + __sha512_update(&ctx->ctx.sha_ctx, data, data_len); +} + +/** + * hmac_sha512_final() - Finish computing an HMAC-SHA512 value + * @ctx: the HMAC context to finalize; must have been initialized + * @out: (output) the resulting HMAC-SHA512 value + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void hmac_sha512_final(struct hmac_sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]); + +/** + * hmac_sha512() - Compute HMAC-SHA512 in one shot, using a prepared key + * @key: the prepared HMAC key + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA512 value + * + * If you're using the key only once, consider using hmac_sha512_usingrawkey(). + * + * Context: Any context. + */ +void hmac_sha512(const struct hmac_sha512_key *key, + const u8 *data, size_t data_len, u8 out[SHA512_DIGEST_SIZE]); + +/** + * hmac_sha512_usingrawkey() - Compute HMAC-SHA512 in one shot, using a raw key + * @raw_key: the raw HMAC-SHA512 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA512 value + * + * If you're using the key multiple times, prefer to use + * hmac_sha512_preparekey() followed by multiple calls to hmac_sha512() instead. + * + * Context: Any context. + */ +void hmac_sha512_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[SHA512_DIGEST_SIZE]); + #endif /* _CRYPTO_SHA2_H */ -- cgit v1.2.3 From 469acaa12502e05eefd439693361fe4b851a4fd5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:03:09 -0700 Subject: crypto: sha512 - Replace sha512_generic with wrapper around SHA-512 library Delete crypto/sha512_generic.c, which provided "generic" SHA-384 and SHA-512 crypto_shash algorithms. Replace it with crypto/sha512.c which provides SHA-384, SHA-512, HMAC-SHA384, and HMAC-SHA512 crypto_shash algorithms using the corresponding library functions. This is a prerequisite for migrating all the arch-optimized SHA-512 code (which is almost 3000 lines) to lib/crypto/ rather than duplicating it. Since the replacement crypto_shash algorithms are implemented using the (potentially arch-optimized) library functions, give them cra_driver_names ending with "-lib" rather than "-generic". Update crypto/testmgr.c and one odd driver to take this change in driver name into account. Besides these cases which are accounted for, there are no known cases where the cra_driver_name was being depended on. This change does mean that the abstract partial block handling code in crypto/shash.c, which got added in 6.16, no longer gets used. But that's fine; the library has to implement the partial block handling anyway, and it's better to do it in the library since the block size and other properties of the algorithm are all fixed at compile time there, resulting in more streamlined code. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160320.2888-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha512_base.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h index aa814bab442d..d1361b3eb70b 100644 --- a/include/crypto/sha512_base.h +++ b/include/crypto/sha512_base.h @@ -114,7 +114,4 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) return 0; } -void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, - int blocks); - #endif /* _CRYPTO_SHA512_BASE_H */ -- cgit v1.2.3 From 9b5c0d82b26d10733f67e10ea1889fc24aa6840a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:03:20 -0700 Subject: crypto: sha512 - Remove sha512_base.h sha512_base.h is no longer used, so remove it. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160320.2888-17-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha512_base.h | 117 ------------------------------------------- 1 file changed, 117 deletions(-) delete mode 100644 include/crypto/sha512_base.h (limited to 'include') diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h deleted file mode 100644 index d1361b3eb70b..000000000000 --- a/include/crypto/sha512_base.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha512_base.h - core logic for SHA-512 implementations - * - * Copyright (C) 2015 Linaro Ltd - */ - -#ifndef _CRYPTO_SHA512_BASE_H -#define _CRYPTO_SHA512_BASE_H - -#include -#include -#include -#include -#include -#include -#include - -typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, - int blocks); - -static inline int sha384_base_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA384_H0; - sctx->state[1] = SHA384_H1; - sctx->state[2] = SHA384_H2; - sctx->state[3] = SHA384_H3; - sctx->state[4] = SHA384_H4; - sctx->state[5] = SHA384_H5; - sctx->state[6] = SHA384_H6; - sctx->state[7] = SHA384_H7; - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static inline int sha512_base_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA512_H0; - sctx->state[1] = SHA512_H1; - sctx->state[2] = SHA512_H2; - sctx->state[3] = SHA512_H3; - sctx->state[4] = SHA512_H4; - sctx->state[5] = SHA512_H5; - sctx->state[6] = SHA512_H6; - sctx->state[7] = SHA512_H7; - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static inline int sha512_base_do_update_blocks(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha512_block_fn *block_fn) -{ - unsigned int remain = len - round_down(len, SHA512_BLOCK_SIZE); - struct sha512_state *sctx = shash_desc_ctx(desc); - - len -= remain; - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - block_fn(sctx, data, len / SHA512_BLOCK_SIZE); - return remain; -} - -static inline int sha512_base_do_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, - sha512_block_fn *block_fn) -{ - unsigned int bit_offset = SHA512_BLOCK_SIZE / 8 - 2; - struct sha512_state *sctx = shash_desc_ctx(desc); - union { - __be64 b64[SHA512_BLOCK_SIZE / 4]; - u8 u8[SHA512_BLOCK_SIZE * 2]; - } block = {}; - - if (len >= SHA512_BLOCK_SIZE) { - int remain; - - remain = sha512_base_do_update_blocks(desc, src, len, block_fn); - src += len - remain; - len = remain; - } - - if (len >= bit_offset * 8) - bit_offset += SHA512_BLOCK_SIZE / 8; - memcpy(&block, src, len); - block.u8[len] = 0x80; - sctx->count[0] += len; - block.b64[bit_offset] = cpu_to_be64(sctx->count[1] << 3 | - sctx->count[0] >> 61); - block.b64[bit_offset + 1] = cpu_to_be64(sctx->count[0] << 3); - block_fn(sctx, block.u8, (bit_offset + 2) * 8 / SHA512_BLOCK_SIZE); - memzero_explicit(&block, sizeof(block)); - - return 0; -} - -static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) -{ - unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - struct sha512_state *sctx = shash_desc_ctx(desc); - __be64 *digest = (__be64 *)out; - int i; - - for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) - put_unaligned_be64(sctx->state[i], digest++); - return 0; -} - -#endif /* _CRYPTO_SHA512_BASE_H */ -- cgit v1.2.3 From f2703a104e89077e622e2f34ac686262c5180d71 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 6 Jun 2025 20:22:28 -0700 Subject: lib/crc32: Remove unused combination support Remove crc32_le_combine() and crc32_le_shift(), since they are no longer used. Although combination is an interesting thing that can be done with CRCs, it turned out that none of the users of it in the kernel were even close to being worthwhile. All were much better off simply chaining the CRCs or processing zeroes. Let's remove the CRC32 combination code for now. It can come back (potentially optimized with carryless multiplication instructions) if there is ever a case where it would actually be worthwhile. Link: https://lore.kernel.org/r/20250607032228.27868-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc32.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 8c1883b81b42..36bbc0405aa0 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -48,31 +48,6 @@ u32 crc32_optimizations(void); static inline u32 crc32_optimizations(void) { return 0; } #endif -/** - * crc32_le_combine - Combine two crc32 check values into one. For two - * sequences of bytes, seq1 and seq2 with lengths len1 - * and len2, crc32_le() check values were calculated - * for each, crc1 and crc2. - * - * @crc1: crc32 of the first block - * @crc2: crc32 of the second block - * @len2: length of the second block - * - * Return: The crc32_le() check value of seq1 and seq2 concatenated, - * requiring only crc1, crc2, and len2. Note: If seq_full denotes - * the concatenated memory area of seq1 with seq2, and crc_full - * the crc32_le() value of seq_full, then crc_full == - * crc32_le_combine(crc1, crc2, len2) when crc_full was seeded - * with the same initializer as crc1, and crc2 seed was 0. See - * also crc32_combine_test(). - */ -u32 crc32_le_shift(u32 crc, size_t len); - -static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) -{ - return crc32_le_shift(crc1, len2) ^ crc2; -} - #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* -- cgit v1.2.3 From 89a51591405e09a862b9ca1ccfa880986c495c3c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 7 Jun 2025 13:04:43 -0700 Subject: lib/crc: Move files into lib/crc/ Move all CRC files in lib/ into a subdirectory lib/crc/ to keep them from cluttering up the main lib/ directory. Reviewed-by: "Martin K. Petersen" Acked-by: Ingo Molnar Acked-by: "Jason A. Donenfeld" Link: https://lore.kernel.org/r/20250607200454.73587-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc64.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/crc64.h b/include/linux/crc64.h index 41de30b907df..b6aa290a7931 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * See lib/crc64.c for the related specification and polynomial arithmetic. - */ #ifndef _LINUX_CRC64_H #define _LINUX_CRC64_H -- cgit v1.2.3 From 0bcfca56406dc6342e30fafe41a2f34cdde029b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 7 Jun 2025 13:04:44 -0700 Subject: lib/crc: Prepare for arch-optimized code in subdirs of lib/crc/ Rework how lib/crc/ supports arch-optimized code. First, instead of the arch-optimized CRC code being in arch/$(SRCARCH)/lib/, it will now be in lib/crc/$(SRCARCH)/. Second, the API functions (e.g. crc32c()), arch-optimized functions (e.g. crc32c_arch()), and generic functions (e.g. crc32c_base()) will now be part of a single module for each CRC type, allowing better inlining and dead code elimination. The second change is made possible by the first. As an example, consider CONFIG_CRC32=m on x86. We'll now have just crc32.ko instead of both crc32-x86.ko and crc32.ko. The two modules were already coupled together and always both got loaded together via direct symbol dependency, so the separation provided no benefit. Note: later I'd like to apply the same design to lib/crypto/ too, where often the API functions are out-of-line so this will work even better. In those cases, for each algorithm we currently have 3 modules all coupled together, e.g. libsha256.ko, libsha256-generic.ko, and sha256-x86.ko. We should have just one, inline things properly, and rely on the compiler's dead code elimination to decide the inclusion of the generic code instead of manually setting it via kconfig. Having arch-specific code outside arch/ was somewhat controversial when Zinc proposed it back in 2018. But I don't think the concerns are warranted. It's better from a technical perspective, as it enables the improvements mentioned above. This model is already successfully used in other places in the kernel such as lib/raid6/. The community of each architecture still remains free to work on the code, even if it's not in arch/. At the time there was also a desire to put the library code in the same files as the old-school crypto API, but that was a mistake; now that the library is separate, that's no longer a constraint either. Reviewed-by: "Martin K. Petersen" Acked-by: Ingo Molnar Acked-by: "Jason A. Donenfeld" Link: https://lore.kernel.org/r/20250607200454.73587-3-ebiggers@kernel.org Link: https://lore.kernel.org/r/20250612054514.142728-1-ebiggers@kernel.org Link: https://lore.kernel.org/r/20250621012221.4351-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc-t10dif.h | 10 +--------- include/linux/crc32.h | 30 +++--------------------------- include/linux/crc64.h | 19 ++----------------- 3 files changed, 6 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index a559fdff3f7e..ecc8bc2dd7f4 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -4,15 +4,7 @@ #include -u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len); -u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); - -static inline u16 crc_t10dif_update(u16 crc, const u8 *p, size_t len) -{ - if (IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)) - return crc_t10dif_arch(crc, p, len); - return crc_t10dif_generic(crc, p, len); -} +u16 crc_t10dif_update(u16 crc, const u8 *p, size_t len); static inline u16 crc_t10dif(const u8 *p, size_t len) { diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 36bbc0405aa0..22dbe7144eb4 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -5,33 +5,9 @@ #include #include -u32 crc32_le_arch(u32 crc, const u8 *p, size_t len); -u32 crc32_le_base(u32 crc, const u8 *p, size_t len); -u32 crc32_be_arch(u32 crc, const u8 *p, size_t len); -u32 crc32_be_base(u32 crc, const u8 *p, size_t len); -u32 crc32c_arch(u32 crc, const u8 *p, size_t len); -u32 crc32c_base(u32 crc, const u8 *p, size_t len); - -static inline u32 crc32_le(u32 crc, const void *p, size_t len) -{ - if (IS_ENABLED(CONFIG_CRC32_ARCH)) - return crc32_le_arch(crc, p, len); - return crc32_le_base(crc, p, len); -} - -static inline u32 crc32_be(u32 crc, const void *p, size_t len) -{ - if (IS_ENABLED(CONFIG_CRC32_ARCH)) - return crc32_be_arch(crc, p, len); - return crc32_be_base(crc, p, len); -} - -static inline u32 crc32c(u32 crc, const void *p, size_t len) -{ - if (IS_ENABLED(CONFIG_CRC32_ARCH)) - return crc32c_arch(crc, p, len); - return crc32c_base(crc, p, len); -} +u32 crc32_le(u32 crc, const void *p, size_t len); +u32 crc32_be(u32 crc, const void *p, size_t len); +u32 crc32c(u32 crc, const void *p, size_t len); /* * crc32_optimizations() returns flags that indicate which CRC32 library diff --git a/include/linux/crc64.h b/include/linux/crc64.h index b6aa290a7931..fc0c06ab1993 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -4,11 +4,6 @@ #include -u64 crc64_be_arch(u64 crc, const u8 *p, size_t len); -u64 crc64_be_generic(u64 crc, const u8 *p, size_t len); -u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len); -u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len); - /** * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, @@ -16,12 +11,7 @@ u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len); * @p: pointer to buffer over which CRC64 is run * @len: length of buffer @p */ -static inline u64 crc64_be(u64 crc, const void *p, size_t len) -{ - if (IS_ENABLED(CONFIG_CRC64_ARCH)) - return crc64_be_arch(crc, p, len); - return crc64_be_generic(crc, p, len); -} +u64 crc64_be(u64 crc, const void *p, size_t len); /** * crc64_nvme - Calculate CRC64-NVME @@ -33,11 +23,6 @@ static inline u64 crc64_be(u64 crc, const void *p, size_t len) * This computes the CRC64 defined in the NVME NVM Command Set Specification, * *including the bitwise inversion at the beginning and end*. */ -static inline u64 crc64_nvme(u64 crc, const void *p, size_t len) -{ - if (IS_ENABLED(CONFIG_CRC64_ARCH)) - return ~crc64_nvme_arch(~crc, p, len); - return ~crc64_nvme_generic(~crc, p, len); -} +u64 crc64_nvme(u64 crc, const void *p, size_t len); #endif /* _LINUX_CRC64_H */ -- cgit v1.2.3 From 8c7c675155ce3f0f6b12c57a47298a390661652d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 19 Jun 2025 11:34:12 -0700 Subject: lib/crc: crc32: Document crc32_le(), crc32_be(), and crc32c() Document these widely used functions. Update kernel-api.rst to point to the correct place, instead of to crc32-main.c which no longer contains kerneldoc comments. Simplify the documentation in crc32poly.h to just point to the corresponding functions, now that they are properly documented. Change the value of CRC32C_POLY_LE to lower case, for consistency. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250619183414.100082-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc32.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/crc32poly.h | 16 ++++-------- 2 files changed, 71 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 22dbe7144eb4..f9c173206d4d 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -5,8 +5,74 @@ #include #include +/** + * crc32_le() - Compute least-significant-bit-first IEEE CRC-32 + * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or + * the previous CRC value if computing incrementally. + * @p: Pointer to the data buffer + * @len: Length of data in bytes + * + * This implements the CRC variant that is often known as the IEEE CRC-32, or + * simply CRC-32, and is widely used in Ethernet and other applications: + * + * - Polynomial: x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + + * x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + * - Bit order: Least-significant-bit-first + * - Polynomial in integer form: 0xedb88320 + * + * This does *not* invert the CRC at the beginning or end. The caller is + * expected to do that if it needs to. Inverting at both ends is recommended. + * + * For new applications, prefer to use CRC-32C instead. See crc32c(). + * + * Context: Any context + * Return: The new CRC value + */ u32 crc32_le(u32 crc, const void *p, size_t len); + +/** + * crc32_be() - Compute most-significant-bit-first IEEE CRC-32 + * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or + * the previous CRC value if computing incrementally. + * @p: Pointer to the data buffer + * @len: Length of data in bytes + * + * crc32_be() is the same as crc32_le() except that crc32_be() computes the + * *most-significant-bit-first* variant of the CRC. I.e., within each byte, the + * most significant bit is processed first (treated as highest order polynomial + * coefficient). The same bit order is also used for the CRC value itself: + * + * - Polynomial: x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + + * x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + * - Bit order: Most-significant-bit-first + * - Polynomial in integer form: 0x04c11db7 + * + * Context: Any context + * Return: The new CRC value + */ u32 crc32_be(u32 crc, const void *p, size_t len); + +/** + * crc32c() - Compute CRC-32C + * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or + * the previous CRC value if computing incrementally. + * @p: Pointer to the data buffer + * @len: Length of data in bytes + * + * This implements CRC-32C, i.e. the Castagnoli CRC. This is the recommended + * CRC variant to use in new applications that want a 32-bit CRC. + * + * - Polynomial: x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + + * x^18 + x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 + * - Bit order: Least-significant-bit-first + * - Polynomial in integer form: 0x82f63b78 + * + * This does *not* invert the CRC at the beginning or end. The caller is + * expected to do that if it needs to. Inverting at both ends is recommended. + * + * Context: Any context + * Return: The new CRC value + */ u32 crc32c(u32 crc, const void *p, size_t len); /* diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h index 62c4b7790a28..ccab711295fa 100644 --- a/include/linux/crc32poly.h +++ b/include/linux/crc32poly.h @@ -2,19 +2,13 @@ #ifndef _LINUX_CRC32_POLY_H #define _LINUX_CRC32_POLY_H -/* - * There are multiple 16-bit CRC polynomials in common use, but this is - * *the* standard CRC-32 polynomial, first popularized by Ethernet. - * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 - */ +/* The polynomial used by crc32_le(), in integer form. See crc32_le(). */ #define CRC32_POLY_LE 0xedb88320 + +/* The polynomial used by crc32_be(), in integer form. See crc32_be(). */ #define CRC32_POLY_BE 0x04c11db7 -/* - * This is the CRC32c polynomial, as outlined by Castagnoli. - * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ - * x^8+x^6+x^0 - */ -#define CRC32C_POLY_LE 0x82F63B78 +/* The polynomial used by crc32c(), in integer form. See crc32c(). */ +#define CRC32C_POLY_LE 0x82f63b78 #endif /* _LINUX_CRC32_POLY_H */ -- cgit v1.2.3 From 0b5a58c078167f73d10711734cdc8ea592561ca9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 19 Jun 2025 11:34:13 -0700 Subject: lib/crc: crc32: Change crc32() from macro to inline function and remove cast There's no need for crc32() to be a macro. Make it an inline function instead. Also, remove the cast of the data pointer to 'unsigned char const *', which is no longer necessary now that the type used in the function prototype is 'const void *'. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250619183414.100082-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc32.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index f9c173206d4d..da78b215ff2e 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -30,6 +30,12 @@ */ u32 crc32_le(u32 crc, const void *p, size_t len); +/* This is just an alias for crc32_le(). */ +static inline u32 crc32(u32 crc, const void *p, size_t len) +{ + return crc32_le(crc, p, len); +} + /** * crc32_be() - Compute most-significant-bit-first IEEE CRC-32 * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or @@ -90,8 +96,6 @@ u32 crc32_optimizations(void); static inline u32 crc32_optimizations(void) { return 0; } #endif -#define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) - /* * Helpers for hash table generation of ethernet nics: * -- cgit v1.2.3 From 94b2030968be70b33fed9a5514a5967c7f20aebc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Jun 2025 11:36:54 -0600 Subject: io_uring: remove errant ';' from IORING_CQE_F_TSTAMP_HW definition An errant ';' slipped into that definition, which will cause some compilers to complain when it's used in an application: timestamp.c:257:45: error: empty expression statement has no effect; remove unnecessary ';' to silence this warning [-Werror,-Wextra-semi-stmt] 257 | hwts = cqe->flags & IORING_CQE_F_TSTAMP_HW; | ^ Fixes: 9e4ed359b8ef ("io_uring/netcmd: add tx timestamping cmd support") Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 85600ad0ac08..b6be063693c8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -980,7 +980,7 @@ enum io_uring_socket_op { /* The cqe->flags bit from which the timestamp type is stored */ #define IORING_TIMESTAMP_TYPE_SHIFT (IORING_TIMESTAMP_HW_SHIFT + 1) /* The cqe->flags flag signifying whether it's a hardware timestamp */ -#define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT); +#define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT) struct io_timespec { __u64 tv_sec; -- cgit v1.2.3 From a70e9f647f501e36a6a092888b1ea7386b7c5664 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 24 Jun 2025 20:35:56 +0200 Subject: entry: Split generic entry into generic exception and syscall entry Currently CONFIG_GENERIC_ENTRY enables both the generic exception entry logic and the generic syscall entry logic, which are otherwise loosely coupled. Introduce separate config options for these so that architectures can select the two independently. This will make it easier for architectures to migrate to generic entry code. Suggested-by: Mark Rutland Signed-off-by: Jinjie Ruan Signed-off-by: Linus Walleij Signed-off-by: Thomas Gleixner Acked-by: Linus Walleij Link: https://lore.kernel.org/20250213130007.1418890-2-ruanjinjie@huawei.com Link: https://lore.kernel.org/all/20250624-generic-entry-split-v1-1-53d5ef4f94df@linaro.org [Linus Walleij: rebase onto v6.16-rc1] --- include/linux/entry-common.h | 382 +------------------------------------- include/linux/irq-entry-common.h | 389 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 390 insertions(+), 381 deletions(-) create mode 100644 include/linux/irq-entry-common.h (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index f94f3fdf15fc..7177436f0f9e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -2,28 +2,16 @@ #ifndef __LINUX_ENTRYCOMMON_H #define __LINUX_ENTRYCOMMON_H -#include +#include #include -#include #include #include -#include #include #include -#include -#include #include #include -/* - * Define dummy _TIF work flags if not defined by the architecture or for - * disabled functionality. - */ -#ifndef _TIF_PATCH_PENDING -# define _TIF_PATCH_PENDING (0) -#endif - #ifndef _TIF_UPROBE # define _TIF_UPROBE (0) #endif @@ -56,69 +44,6 @@ SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ ARCH_SYSCALL_WORK_EXIT) -/* - * TIF flags handled in exit_to_user_mode_loop() - */ -#ifndef ARCH_EXIT_TO_USER_MODE_WORK -# define ARCH_EXIT_TO_USER_MODE_WORK (0) -#endif - -#define EXIT_TO_USER_MODE_WORK \ - (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ - _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ - ARCH_EXIT_TO_USER_MODE_WORK) - -/** - * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs - * @regs: Pointer to currents pt_regs - * - * Defaults to an empty implementation. Can be replaced by architecture - * specific code. - * - * Invoked from syscall_enter_from_user_mode() in the non-instrumentable - * section. Use __always_inline so the compiler cannot push it out of line - * and make it instrumentable. - */ -static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); - -#ifndef arch_enter_from_user_mode -static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} -#endif - -/** - * enter_from_user_mode - Establish state when coming from user mode - * - * Syscall/interrupt entry disables interrupts, but user mode is traced as - * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. - * - * 1) Tell lockdep that interrupts are disabled - * 2) Invoke context tracking if enabled to reactivate RCU - * 3) Trace interrupts off state - * - * Invoked from architecture specific syscall entry code with interrupts - * disabled. The calling code has to be non-instrumentable. When the - * function returns all state is correct and interrupts are still - * disabled. The subsequent functions can be instrumented. - * - * This is invoked when there is architecture specific functionality to be - * done between establishing state and enabling interrupts. The caller must - * enable interrupts before invoking syscall_enter_from_user_mode_work(). - */ -static __always_inline void enter_from_user_mode(struct pt_regs *regs) -{ - arch_enter_from_user_mode(regs); - lockdep_hardirqs_off(CALLER_ADDR0); - - CT_WARN_ON(__ct_state() != CT_STATE_USER); - user_exit_irqoff(); - - instrumentation_begin(); - kmsan_unpoison_entry_regs(regs); - trace_hardirqs_off_finish(); - instrumentation_end(); -} - /** * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs @@ -203,170 +128,6 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l return ret; } -/** - * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() - * @ti_work: Cached TIF flags gathered with interrupts disabled - * - * Defaults to local_irq_enable(). Can be supplied by architecture specific - * code. - */ -static inline void local_irq_enable_exit_to_user(unsigned long ti_work); - -#ifndef local_irq_enable_exit_to_user -static inline void local_irq_enable_exit_to_user(unsigned long ti_work) -{ - local_irq_enable(); -} -#endif - -/** - * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() - * - * Defaults to local_irq_disable(). Can be supplied by architecture specific - * code. - */ -static inline void local_irq_disable_exit_to_user(void); - -#ifndef local_irq_disable_exit_to_user -static inline void local_irq_disable_exit_to_user(void) -{ - local_irq_disable(); -} -#endif - -/** - * arch_exit_to_user_mode_work - Architecture specific TIF work for exit - * to user mode. - * @regs: Pointer to currents pt_regs - * @ti_work: Cached TIF flags gathered with interrupts disabled - * - * Invoked from exit_to_user_mode_loop() with interrupt enabled - * - * Defaults to NOOP. Can be supplied by architecture specific code. - */ -static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, - unsigned long ti_work); - -#ifndef arch_exit_to_user_mode_work -static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, - unsigned long ti_work) -{ -} -#endif - -/** - * arch_exit_to_user_mode_prepare - Architecture specific preparation for - * exit to user mode. - * @regs: Pointer to currents pt_regs - * @ti_work: Cached TIF flags gathered with interrupts disabled - * - * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last - * function before return. Defaults to NOOP. - */ -static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, - unsigned long ti_work); - -#ifndef arch_exit_to_user_mode_prepare -static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, - unsigned long ti_work) -{ -} -#endif - -/** - * arch_exit_to_user_mode - Architecture specific final work before - * exit to user mode. - * - * Invoked from exit_to_user_mode() with interrupt disabled as the last - * function before return. Defaults to NOOP. - * - * This needs to be __always_inline because it is non-instrumentable code - * invoked after context tracking switched to user mode. - * - * An architecture implementation must not do anything complex, no locking - * etc. The main purpose is for speculation mitigations. - */ -static __always_inline void arch_exit_to_user_mode(void); - -#ifndef arch_exit_to_user_mode -static __always_inline void arch_exit_to_user_mode(void) { } -#endif - -/** - * arch_do_signal_or_restart - Architecture specific signal delivery function - * @regs: Pointer to currents pt_regs - * - * Invoked from exit_to_user_mode_loop(). - */ -void arch_do_signal_or_restart(struct pt_regs *regs); - -/** - * exit_to_user_mode_loop - do any pending work before leaving to user space - */ -unsigned long exit_to_user_mode_loop(struct pt_regs *regs, - unsigned long ti_work); - -/** - * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required - * @regs: Pointer to pt_regs on entry stack - * - * 1) check that interrupts are disabled - * 2) call tick_nohz_user_enter_prepare() - * 3) call exit_to_user_mode_loop() if any flags from - * EXIT_TO_USER_MODE_WORK are set - * 4) check that interrupts are still disabled - */ -static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) -{ - unsigned long ti_work; - - lockdep_assert_irqs_disabled(); - - /* Flush pending rcuog wakeup before the last need_resched() check */ - tick_nohz_user_enter_prepare(); - - ti_work = read_thread_flags(); - if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) - ti_work = exit_to_user_mode_loop(regs, ti_work); - - arch_exit_to_user_mode_prepare(regs, ti_work); - - /* Ensure that kernel state is sane for a return to userspace */ - kmap_assert_nomap(); - lockdep_assert_irqs_disabled(); - lockdep_sys_exit(); -} - -/** - * exit_to_user_mode - Fixup state when exiting to user mode - * - * Syscall/interrupt exit enables interrupts, but the kernel state is - * interrupts disabled when this is invoked. Also tell RCU about it. - * - * 1) Trace interrupts on state - * 2) Invoke context tracking if enabled to adjust RCU state - * 3) Invoke architecture specific last minute exit code, e.g. speculation - * mitigations, etc.: arch_exit_to_user_mode() - * 4) Tell lockdep that interrupts are enabled - * - * Invoked from architecture specific code when syscall_exit_to_user_mode() - * is not suitable as the last step before returning to userspace. Must be - * invoked with interrupts disabled and the caller must be - * non-instrumentable. - * The caller has to invoke syscall_exit_to_user_mode_work() before this. - */ -static __always_inline void exit_to_user_mode(void) -{ - instrumentation_begin(); - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(); - instrumentation_end(); - - user_enter_irqoff(); - arch_exit_to_user_mode(); - lockdep_hardirqs_on(CALLER_ADDR0); -} - /** * syscall_exit_work - Handle work before returning to user mode * @regs: Pointer to current pt_regs @@ -451,145 +212,4 @@ static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) exit_to_user_mode(); } -/** - * irqentry_enter_from_user_mode - Establish state before invoking the irq handler - * @regs: Pointer to currents pt_regs - * - * Invoked from architecture specific entry code with interrupts disabled. - * Can only be called when the interrupt entry came from user mode. The - * calling code must be non-instrumentable. When the function returns all - * state is correct and the subsequent functions can be instrumented. - * - * The function establishes state (lockdep, RCU (context tracking), tracing) - */ -void irqentry_enter_from_user_mode(struct pt_regs *regs); - -/** - * irqentry_exit_to_user_mode - Interrupt exit work - * @regs: Pointer to current's pt_regs - * - * Invoked with interrupts disabled and fully valid regs. Returns with all - * work handled, interrupts disabled such that the caller can immediately - * switch to user mode. Called from architecture specific interrupt - * handling code. - * - * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). - * Interrupt exit is not invoking #1 which is the syscall specific one time - * work. - */ -void irqentry_exit_to_user_mode(struct pt_regs *regs); - -#ifndef irqentry_state -/** - * struct irqentry_state - Opaque object for exception state storage - * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the - * exit path has to invoke ct_irq_exit(). - * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that - * lockdep state is restored correctly on exit from nmi. - * - * This opaque object is filled in by the irqentry_*_enter() functions and - * must be passed back into the corresponding irqentry_*_exit() functions - * when the exception is complete. - * - * Callers of irqentry_*_[enter|exit]() must consider this structure opaque - * and all members private. Descriptions of the members are provided to aid in - * the maintenance of the irqentry_*() functions. - */ -typedef struct irqentry_state { - union { - bool exit_rcu; - bool lockdep; - }; -} irqentry_state_t; -#endif - -/** - * irqentry_enter - Handle state tracking on ordinary interrupt entries - * @regs: Pointer to pt_regs of interrupted context - * - * Invokes: - * - lockdep irqflag state tracking as low level ASM entry disabled - * interrupts. - * - * - Context tracking if the exception hit user mode. - * - * - The hardirq tracer to keep the state consistent as low level ASM - * entry disabled interrupts. - * - * As a precondition, this requires that the entry came from user mode, - * idle, or a kernel context in which RCU is watching. - * - * For kernel mode entries RCU handling is done conditional. If RCU is - * watching then the only RCU requirement is to check whether the tick has - * to be restarted. If RCU is not watching then ct_irq_enter() has to be - * invoked on entry and ct_irq_exit() on exit. - * - * Avoiding the ct_irq_enter/exit() calls is an optimization but also - * solves the problem of kernel mode pagefaults which can schedule, which - * is not possible after invoking ct_irq_enter() without undoing it. - * - * For user mode entries irqentry_enter_from_user_mode() is invoked to - * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit - * would not be possible. - * - * Returns: An opaque object that must be passed to idtentry_exit() - */ -irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); - -/** - * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt - * - * Conditional reschedule with additional sanity checks. - */ -void raw_irqentry_exit_cond_resched(void); -#ifdef CONFIG_PREEMPT_DYNAMIC -#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched -#define irqentry_exit_cond_resched_dynamic_disabled NULL -DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); -#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() -#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); -void dynamic_irqentry_exit_cond_resched(void); -#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() -#endif -#else /* CONFIG_PREEMPT_DYNAMIC */ -#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() -#endif /* CONFIG_PREEMPT_DYNAMIC */ - -/** - * irqentry_exit - Handle return from exception that used irqentry_enter() - * @regs: Pointer to pt_regs (exception entry regs) - * @state: Return value from matching call to irqentry_enter() - * - * Depending on the return target (kernel/user) this runs the necessary - * preemption and work checks if possible and required and returns to - * the caller with interrupts disabled and no further work pending. - * - * This is the last action before returning to the low level ASM code which - * just needs to return to the appropriate context. - * - * Counterpart to irqentry_enter(). - */ -void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); - -/** - * irqentry_nmi_enter - Handle NMI entry - * @regs: Pointer to currents pt_regs - * - * Similar to irqentry_enter() but taking care of the NMI constraints. - */ -irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); - -/** - * irqentry_nmi_exit - Handle return from NMI handling - * @regs: Pointer to pt_regs (NMI entry regs) - * @irq_state: Return value from matching call to irqentry_nmi_enter() - * - * Last action before returning to the low level assembly code. - * - * Counterpart to irqentry_nmi_enter(). - */ -void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); - #endif diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h new file mode 100644 index 000000000000..8af374331900 --- /dev/null +++ b/include/linux/irq-entry-common.h @@ -0,0 +1,389 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_IRQENTRYCOMMON_H +#define __LINUX_IRQENTRYCOMMON_H + +#include +#include +#include +#include +#include + +#include + +/* + * Define dummy _TIF work flags if not defined by the architecture or for + * disabled functionality. + */ +#ifndef _TIF_PATCH_PENDING +# define _TIF_PATCH_PENDING (0) +#endif + +/* + * TIF flags handled in exit_to_user_mode_loop() + */ +#ifndef ARCH_EXIT_TO_USER_MODE_WORK +# define ARCH_EXIT_TO_USER_MODE_WORK (0) +#endif + +#define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ + _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + ARCH_EXIT_TO_USER_MODE_WORK) + +/** + * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs + * @regs: Pointer to currents pt_regs + * + * Defaults to an empty implementation. Can be replaced by architecture + * specific code. + * + * Invoked from syscall_enter_from_user_mode() in the non-instrumentable + * section. Use __always_inline so the compiler cannot push it out of line + * and make it instrumentable. + */ +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); + +#ifndef arch_enter_from_user_mode +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} +#endif + +/** + * enter_from_user_mode - Establish state when coming from user mode + * + * Syscall/interrupt entry disables interrupts, but user mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct and interrupts are still + * disabled. The subsequent functions can be instrumented. + * + * This is invoked when there is architecture specific functionality to be + * done between establishing state and enabling interrupts. The caller must + * enable interrupts before invoking syscall_enter_from_user_mode_work(). + */ +static __always_inline void enter_from_user_mode(struct pt_regs *regs) +{ + arch_enter_from_user_mode(regs); + lockdep_hardirqs_off(CALLER_ADDR0); + + CT_WARN_ON(__ct_state() != CT_STATE_USER); + user_exit_irqoff(); + + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + trace_hardirqs_off_finish(); + instrumentation_end(); +} + +/** + * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() + * @ti_work: Cached TIF flags gathered with interrupts disabled + * + * Defaults to local_irq_enable(). Can be supplied by architecture specific + * code. + */ +static inline void local_irq_enable_exit_to_user(unsigned long ti_work); + +#ifndef local_irq_enable_exit_to_user +static inline void local_irq_enable_exit_to_user(unsigned long ti_work) +{ + local_irq_enable(); +} +#endif + +/** + * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() + * + * Defaults to local_irq_disable(). Can be supplied by architecture specific + * code. + */ +static inline void local_irq_disable_exit_to_user(void); + +#ifndef local_irq_disable_exit_to_user +static inline void local_irq_disable_exit_to_user(void) +{ + local_irq_disable(); +} +#endif + +/** + * arch_exit_to_user_mode_work - Architecture specific TIF work for exit + * to user mode. + * @regs: Pointer to currents pt_regs + * @ti_work: Cached TIF flags gathered with interrupts disabled + * + * Invoked from exit_to_user_mode_loop() with interrupt enabled + * + * Defaults to NOOP. Can be supplied by architecture specific code. + */ +static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work); + +#ifndef arch_exit_to_user_mode_work +static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work) +{ +} +#endif + +/** + * arch_exit_to_user_mode_prepare - Architecture specific preparation for + * exit to user mode. + * @regs: Pointer to currents pt_regs + * @ti_work: Cached TIF flags gathered with interrupts disabled + * + * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last + * function before return. Defaults to NOOP. + */ +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work); + +#ifndef arch_exit_to_user_mode_prepare +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ +} +#endif + +/** + * arch_exit_to_user_mode - Architecture specific final work before + * exit to user mode. + * + * Invoked from exit_to_user_mode() with interrupt disabled as the last + * function before return. Defaults to NOOP. + * + * This needs to be __always_inline because it is non-instrumentable code + * invoked after context tracking switched to user mode. + * + * An architecture implementation must not do anything complex, no locking + * etc. The main purpose is for speculation mitigations. + */ +static __always_inline void arch_exit_to_user_mode(void); + +#ifndef arch_exit_to_user_mode +static __always_inline void arch_exit_to_user_mode(void) { } +#endif + +/** + * arch_do_signal_or_restart - Architecture specific signal delivery function + * @regs: Pointer to currents pt_regs + * + * Invoked from exit_to_user_mode_loop(). + */ +void arch_do_signal_or_restart(struct pt_regs *regs); + +/** + * exit_to_user_mode_loop - do any pending work before leaving to user space + */ +unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + unsigned long ti_work); + +/** + * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required + * @regs: Pointer to pt_regs on entry stack + * + * 1) check that interrupts are disabled + * 2) call tick_nohz_user_enter_prepare() + * 3) call exit_to_user_mode_loop() if any flags from + * EXIT_TO_USER_MODE_WORK are set + * 4) check that interrupts are still disabled + */ +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long ti_work; + + lockdep_assert_irqs_disabled(); + + /* Flush pending rcuog wakeup before the last need_resched() check */ + tick_nohz_user_enter_prepare(); + + ti_work = read_thread_flags(); + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + + arch_exit_to_user_mode_prepare(regs, ti_work); + + /* Ensure that kernel state is sane for a return to userspace */ + kmap_assert_nomap(); + lockdep_assert_irqs_disabled(); + lockdep_sys_exit(); +} + +/** + * exit_to_user_mode - Fixup state when exiting to user mode + * + * Syscall/interrupt exit enables interrupts, but the kernel state is + * interrupts disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Invoke architecture specific last minute exit code, e.g. speculation + * mitigations, etc.: arch_exit_to_user_mode() + * 4) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code when syscall_exit_to_user_mode() + * is not suitable as the last step before returning to userspace. Must be + * invoked with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke syscall_exit_to_user_mode_work() before this. + */ +static __always_inline void exit_to_user_mode(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + + user_enter_irqoff(); + arch_exit_to_user_mode(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +/** + * irqentry_enter_from_user_mode - Establish state before invoking the irq handler + * @regs: Pointer to currents pt_regs + * + * Invoked from architecture specific entry code with interrupts disabled. + * Can only be called when the interrupt entry came from user mode. The + * calling code must be non-instrumentable. When the function returns all + * state is correct and the subsequent functions can be instrumented. + * + * The function establishes state (lockdep, RCU (context tracking), tracing) + */ +void irqentry_enter_from_user_mode(struct pt_regs *regs); + +/** + * irqentry_exit_to_user_mode - Interrupt exit work + * @regs: Pointer to current's pt_regs + * + * Invoked with interrupts disabled and fully valid regs. Returns with all + * work handled, interrupts disabled such that the caller can immediately + * switch to user mode. Called from architecture specific interrupt + * handling code. + * + * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). + * Interrupt exit is not invoking #1 which is the syscall specific one time + * work. + */ +void irqentry_exit_to_user_mode(struct pt_regs *regs); + +#ifndef irqentry_state +/** + * struct irqentry_state - Opaque object for exception state storage + * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the + * exit path has to invoke ct_irq_exit(). + * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that + * lockdep state is restored correctly on exit from nmi. + * + * This opaque object is filled in by the irqentry_*_enter() functions and + * must be passed back into the corresponding irqentry_*_exit() functions + * when the exception is complete. + * + * Callers of irqentry_*_[enter|exit]() must consider this structure opaque + * and all members private. Descriptions of the members are provided to aid in + * the maintenance of the irqentry_*() functions. + */ +typedef struct irqentry_state { + union { + bool exit_rcu; + bool lockdep; + }; +} irqentry_state_t; +#endif + +/** + * irqentry_enter - Handle state tracking on ordinary interrupt entries + * @regs: Pointer to pt_regs of interrupted context + * + * Invokes: + * - lockdep irqflag state tracking as low level ASM entry disabled + * interrupts. + * + * - Context tracking if the exception hit user mode. + * + * - The hardirq tracer to keep the state consistent as low level ASM + * entry disabled interrupts. + * + * As a precondition, this requires that the entry came from user mode, + * idle, or a kernel context in which RCU is watching. + * + * For kernel mode entries RCU handling is done conditional. If RCU is + * watching then the only RCU requirement is to check whether the tick has + * to be restarted. If RCU is not watching then ct_irq_enter() has to be + * invoked on entry and ct_irq_exit() on exit. + * + * Avoiding the ct_irq_enter/exit() calls is an optimization but also + * solves the problem of kernel mode pagefaults which can schedule, which + * is not possible after invoking ct_irq_enter() without undoing it. + * + * For user mode entries irqentry_enter_from_user_mode() is invoked to + * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit + * would not be possible. + * + * Returns: An opaque object that must be passed to idtentry_exit() + */ +irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); + +/** + * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt + * + * Conditional reschedule with additional sanity checks. + */ +void raw_irqentry_exit_cond_resched(void); +#ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) +#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_disabled NULL +DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); +#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void); +#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() +#endif +#else /* CONFIG_PREEMPT_DYNAMIC */ +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#endif /* CONFIG_PREEMPT_DYNAMIC */ + +/** + * irqentry_exit - Handle return from exception that used irqentry_enter() + * @regs: Pointer to pt_regs (exception entry regs) + * @state: Return value from matching call to irqentry_enter() + * + * Depending on the return target (kernel/user) this runs the necessary + * preemption and work checks if possible and required and returns to + * the caller with interrupts disabled and no further work pending. + * + * This is the last action before returning to the low level ASM code which + * just needs to return to the appropriate context. + * + * Counterpart to irqentry_enter(). + */ +void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); + +/** + * irqentry_nmi_enter - Handle NMI entry + * @regs: Pointer to currents pt_regs + * + * Similar to irqentry_enter() but taking care of the NMI constraints. + */ +irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); + +/** + * irqentry_nmi_exit - Handle return from NMI handling + * @regs: Pointer to pt_regs (NMI entry regs) + * @irq_state: Return value from matching call to irqentry_nmi_enter() + * + * Last action before returning to the low level assembly code. + * + * Counterpart to irqentry_nmi_enter(). + */ +void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); + +#endif -- cgit v1.2.3 From 9b8b84879d4adc506b0d3944e20b28d9f3f6994b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 18 Jun 2025 15:00:45 +0900 Subject: block: Increase BLK_DEF_MAX_SECTORS_CAP Back in 2015, commit d2be537c3ba3 ("block: bump BLK_DEF_MAX_SECTORS to 2560") increased the default maximum size of a block device I/O to 2560 sectors (1280 KiB) to "accommodate a 10-data-disk stripe write with chunk size 128k". This choice is rather arbitrary and since then, improvements to the block layer have software RAID drivers correctly advertize their stripe width through chunk_sectors and abuses of BLK_DEF_MAX_SECTORS_CAP by drivers (to set the HW limit rather than the default user controlled maximum I/O size) have been fixed. Since many block devices can benefit from a larger value of BLK_DEF_MAX_SECTORS_CAP, and in particular HDDs, increase this value to be 4MiB, or 8192 sectors. And given that BLK_DEF_MAX_SECTORS_CAP is only used in the block layer and should not be used by drivers directly, move this macro definition to the block layer internal header file block/blk.h. Suggested-by: Martin K . Petersen Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250618060045.37593-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..5c626d23cbb2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1220,15 +1220,6 @@ enum blk_default_limits { BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; -/* - * Default upper limit for the software max_sectors limit used for - * regular file system I/O. This can be increased through sysfs. - * - * Not to be confused with the max_hw_sector limit that is entirely - * controlled by the driver, usually based on hardware limits. - */ -#define BLK_DEF_MAX_SECTORS_CAP 2560u - static inline struct queue_limits *bdev_limits(struct block_device *bdev) { return &bdev_get_queue(bdev)->limits; -- cgit v1.2.3 From 3f66ccbaaef3a0c5bd844eab04e3207b4061c546 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 25 Jun 2025 18:33:23 +0900 Subject: block: Make REQ_OP_ZONE_FINISH a write operation REQ_OP_ZONE_FINISH is defined as "12", which makes op_is_write(REQ_OP_ZONE_FINISH) return false, despite the fact that a zone finish operation is an operation that modifies a zone (transition it to full) and so should be considered as a write operation (albeit one that does not transfer any data to the device). Fix this by redefining REQ_OP_ZONE_FINISH to be an odd number (13), and redefine REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL using sequential odd numbers from that new value. Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250625093327.548866-2-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3d1577f07c1c..930daff207df 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -350,11 +350,11 @@ enum req_op { /* Close a zone */ REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)13, + REQ_OP_ZONE_RESET = (__force blk_opf_t)15, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, -- cgit v1.2.3 From f70291411ba20d50008db90a6f0731efac27872c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 25 Jun 2025 18:33:24 +0900 Subject: block: Introduce bio_needs_zone_write_plugging() In preparation for fixing device mapper zone write handling, introduce the inline helper function bio_needs_zone_write_plugging() to test if a BIO requires handling through zone write plugging using the function blk_zone_plug_bio(). This function returns true for any write (op_is_write(bio) == true) operation directed at a zoned block device using zone write plugging, that is, a block device with a disk that has a zone write plug hash table. This helper allows simplifying the check on entry to blk_zone_plug_bio() and used in to protect calls to it for blk-mq devices and DM devices. Fixes: f211268ed1f9 ("dm: Use the block layer zone append emulation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250625093327.548866-3-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c626d23cbb2..a51f92b6c340 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -837,6 +837,55 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk) { return disk->nr_zones; } + +/** + * bio_needs_zone_write_plugging - Check if a BIO needs to be handled with zone + * write plugging + * @bio: The BIO being submitted + * + * Return true whenever @bio execution needs to be handled through zone + * write plugging (using blk_zone_plug_bio()). Return false otherwise. + */ +static inline bool bio_needs_zone_write_plugging(struct bio *bio) +{ + enum req_op op = bio_op(bio); + + /* + * Only zoned block devices have a zone write plug hash table. But not + * all of them have one (e.g. DM devices may not need one). + */ + if (!bio->bi_bdev->bd_disk->zone_wplugs_hash) + return false; + + /* Only write operations need zone write plugging. */ + if (!op_is_write(op)) + return false; + + /* Ignore empty flush */ + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return false; + + /* Ignore BIOs that already have been handled by zone write plugging. */ + if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) + return false; + + /* + * All zone write operations must be handled through zone write plugging + * using blk_zone_plug_bio(). + */ + switch (op) { + case REQ_OP_ZONE_APPEND: + case REQ_OP_WRITE: + case REQ_OP_WRITE_ZEROES: + case REQ_OP_ZONE_FINISH: + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_RESET_ALL: + return true; + default: + return false; + } +} + bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); /** @@ -866,6 +915,12 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } + +static inline bool bio_needs_zone_write_plugging(struct bio *bio) +{ + return false; +} + static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) { return false; -- cgit v1.2.3 From 38446014648c9f7b2843f87517c8f2b73906bb40 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jun 2025 13:34:58 +0200 Subject: block: don't merge different kinds of P2P transfers in a single bio To get out of the DMA mapping helpers having to check every segment for it's P2P status, ensure that bios either contain P2P transfers or non-P2P transfers, and that a P2P bio only contains ranges from a single device. This means we do the page zone access in the bio add path where it should be still page hot, and will only have do the fairly expensive P2P topology lookup once per bio down in the DMA mapping path, and only for already marked bios. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Logan Gunthorpe Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20250625113531.522027-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 930daff207df..09b99d52fd36 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -386,6 +386,7 @@ enum req_flag_bits { __REQ_DRV, /* for driver use */ __REQ_FS_PRIVATE, /* for file system (submitter) use */ __REQ_ATOMIC, /* for atomic write operations */ + __REQ_P2PDMA, /* contains P2P DMA pages */ /* * Command specific flags, keep last: */ @@ -418,6 +419,7 @@ enum req_flag_bits { #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) #define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) +#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA) #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) -- cgit v1.2.3 From 858299dc61603670823f8c1d62bf3fc7af44b18b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jun 2025 13:34:59 +0200 Subject: block: add scatterlist-less DMA mapping helpers Add a new blk_rq_dma_map / blk_rq_dma_unmap pair that does away with the wasteful scatterlist structure. Instead it uses the mapping iterator to either add segments to the IOVA for IOMMU operations, or just maps them one by one for the direct mapping. For the IOMMU case instead of a scatterlist with an entry for each segment, only a single [dma_addr,len] pair needs to be stored for processing a request, and for the direct mapping the per-segment allocation shrinks from [page,offset,len,dma_addr,dma_len] to just [dma_addr,len]. One big difference to the scatterlist API, which could be considered downside, is that the IOVA collapsing only works when the driver sets a virt_boundary that matches the IOMMU granule. For NVMe this is done already so it works perfectly. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20250625113531.522027-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq-dma.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 include/linux/blk-mq-dma.h (limited to 'include') diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h new file mode 100644 index 000000000000..c26a01aeae00 --- /dev/null +++ b/include/linux/blk-mq-dma.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef BLK_MQ_DMA_H +#define BLK_MQ_DMA_H + +#include +#include + +struct blk_dma_iter { + /* Output address range for this iteration */ + dma_addr_t addr; + u32 len; + + /* Status code. Only valid when blk_rq_dma_map_iter_* returned false */ + blk_status_t status; + + /* Internal to blk_rq_dma_map_iter_* */ + struct req_iterator iter; + struct pci_p2pdma_map_state p2pdma; +}; + +bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter); +bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter); + +/** + * blk_rq_dma_map_coalesce - were all segments coalesced? + * @state: DMA state to check + * + * Returns true if blk_rq_dma_map_iter_start coalesced all segments into a + * single DMA range. + */ +static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) +{ + return dma_use_iova(state); +} + +/** + * blk_rq_dma_unmap - try to DMA unmap a request + * @req: request to unmap + * @dma_dev: device to unmap from + * @state: DMA IOVA state + * @mapped_len: number of bytes to unmap + * + * Returns %false if the callers need to manually unmap every DMA segment + * mapped using @iter or %true if no work is left to be done. + */ +static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, size_t mapped_len) +{ + if (req->cmd_flags & REQ_P2PDMA) + return true; + + if (dma_use_iova(state)) { + dma_iova_destroy(dma_dev, state, mapped_len, rq_dma_dir(req), + 0); + return true; + } + + return !dma_need_unmap(dma_dev); +} + +#endif /* BLK_MQ_DMA_H */ -- cgit v1.2.3 From 2ad26b7bedcd4941e6dafa1851e2054b369b9d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 19 May 2025 16:41:08 +0100 Subject: include: linux: move adi-axi-common.h out of fpga MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The adi-axi-common.h header has some common defines used in various ADI IPs. However they are not specific for any fpga manager so it's questionable for the header to live under include/linux/fpga. Hence let's just move one directory up and update all users. Suggested-by: Xu Yilun Acked-by: Xu Yilun Acked-by: Jonathan Cameron # for IIO Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250519-dev-axi-clkgen-limits-v6-3-bc4b3b61d1d4@analog.com Acked-by: Mark Brown Acked-by: Uwe Kleine-König Reviewed-by: David Lechner Signed-off-by: Stephen Boyd --- include/linux/adi-axi-common.h | 23 +++++++++++++++++++++++ include/linux/fpga/adi-axi-common.h | 23 ----------------------- 2 files changed, 23 insertions(+), 23 deletions(-) create mode 100644 include/linux/adi-axi-common.h delete mode 100644 include/linux/fpga/adi-axi-common.h (limited to 'include') diff --git a/include/linux/adi-axi-common.h b/include/linux/adi-axi-common.h new file mode 100644 index 000000000000..141ac3f251e6 --- /dev/null +++ b/include/linux/adi-axi-common.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Analog Devices AXI common registers & definitions + * + * Copyright 2019 Analog Devices Inc. + * + * https://wiki.analog.com/resources/fpga/docs/axi_ip + * https://wiki.analog.com/resources/fpga/docs/hdl/regmap + */ + +#ifndef ADI_AXI_COMMON_H_ +#define ADI_AXI_COMMON_H_ + +#define ADI_AXI_REG_VERSION 0x0000 + +#define ADI_AXI_PCORE_VER(major, minor, patch) \ + (((major) << 16) | ((minor) << 8) | (patch)) + +#define ADI_AXI_PCORE_VER_MAJOR(version) (((version) >> 16) & 0xff) +#define ADI_AXI_PCORE_VER_MINOR(version) (((version) >> 8) & 0xff) +#define ADI_AXI_PCORE_VER_PATCH(version) ((version) & 0xff) + +#endif /* ADI_AXI_COMMON_H_ */ diff --git a/include/linux/fpga/adi-axi-common.h b/include/linux/fpga/adi-axi-common.h deleted file mode 100644 index 141ac3f251e6..000000000000 --- a/include/linux/fpga/adi-axi-common.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Analog Devices AXI common registers & definitions - * - * Copyright 2019 Analog Devices Inc. - * - * https://wiki.analog.com/resources/fpga/docs/axi_ip - * https://wiki.analog.com/resources/fpga/docs/hdl/regmap - */ - -#ifndef ADI_AXI_COMMON_H_ -#define ADI_AXI_COMMON_H_ - -#define ADI_AXI_REG_VERSION 0x0000 - -#define ADI_AXI_PCORE_VER(major, minor, patch) \ - (((major) << 16) | ((minor) << 8) | (patch)) - -#define ADI_AXI_PCORE_VER_MAJOR(version) (((version) >> 16) & 0xff) -#define ADI_AXI_PCORE_VER_MINOR(version) (((version) >> 8) & 0xff) -#define ADI_AXI_PCORE_VER_PATCH(version) ((version) & 0xff) - -#endif /* ADI_AXI_COMMON_H_ */ -- cgit v1.2.3 From 6fc942f777b1a94fd2dacd4836aaf7e3b440baf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 19 May 2025 16:41:09 +0100 Subject: include: adi-axi-common: add new helper macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new helper macros and enums to help identifying the platform and some characteristics of it at runtime. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250519-dev-axi-clkgen-limits-v6-4-bc4b3b61d1d4@analog.com Reviewed-by: David Lechner Signed-off-by: Stephen Boyd --- include/linux/adi-axi-common.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/adi-axi-common.h b/include/linux/adi-axi-common.h index 141ac3f251e6..f64f4ad4beda 100644 --- a/include/linux/adi-axi-common.h +++ b/include/linux/adi-axi-common.h @@ -12,6 +12,7 @@ #define ADI_AXI_COMMON_H_ #define ADI_AXI_REG_VERSION 0x0000 +#define ADI_AXI_REG_FPGA_INFO 0x001C #define ADI_AXI_PCORE_VER(major, minor, patch) \ (((major) << 16) | ((minor) << 8) | (patch)) @@ -20,4 +21,36 @@ #define ADI_AXI_PCORE_VER_MINOR(version) (((version) >> 8) & 0xff) #define ADI_AXI_PCORE_VER_PATCH(version) ((version) & 0xff) +#define ADI_AXI_INFO_FPGA_TECH(info) (((info) >> 24) & 0xff) +#define ADI_AXI_INFO_FPGA_FAMILY(info) (((info) >> 16) & 0xff) +#define ADI_AXI_INFO_FPGA_SPEED_GRADE(info) (((info) >> 8) & 0xff) + +enum adi_axi_fpga_technology { + ADI_AXI_FPGA_TECH_UNKNOWN = 0, + ADI_AXI_FPGA_TECH_SERIES7, + ADI_AXI_FPGA_TECH_ULTRASCALE, + ADI_AXI_FPGA_TECH_ULTRASCALE_PLUS, +}; + +enum adi_axi_fpga_family { + ADI_AXI_FPGA_FAMILY_UNKNOWN = 0, + ADI_AXI_FPGA_FAMILY_ARTIX, + ADI_AXI_FPGA_FAMILY_KINTEX, + ADI_AXI_FPGA_FAMILY_VIRTEX, + ADI_AXI_FPGA_FAMILY_ZYNQ, +}; + +enum adi_axi_fpga_speed_grade { + ADI_AXI_FPGA_SPEED_UNKNOWN = 0, + ADI_AXI_FPGA_SPEED_1 = 10, + ADI_AXI_FPGA_SPEED_1L = 11, + ADI_AXI_FPGA_SPEED_1H = 12, + ADI_AXI_FPGA_SPEED_1HV = 13, + ADI_AXI_FPGA_SPEED_1LV = 14, + ADI_AXI_FPGA_SPEED_2 = 20, + ADI_AXI_FPGA_SPEED_2L = 21, + ADI_AXI_FPGA_SPEED_2LV = 22, + ADI_AXI_FPGA_SPEED_3 = 30, +}; + #endif /* ADI_AXI_COMMON_H_ */ -- cgit v1.2.3 From 03dc03fa0432a9160c4fcbdb86f274e6b4587972 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Jun 2025 10:31:10 +0300 Subject: neighbor: Add NTF_EXT_VALIDATED flag for externally validated entries tl;dr ===== Add a new neighbor flag ("extern_valid") that can be used to indicate to the kernel that a neighbor entry was learned and determined to be valid externally. The kernel will not try to remove or invalidate such an entry, leaving these decisions to the user space control plane. This is needed for EVPN multi-homing where a neighbor entry for a multi-homed host needs to be synced across all the VTEPs among which the host is multi-homed. Background ========== In a typical EVPN multi-homing setup each host is multi-homed using a set of links called ES (Ethernet Segment, i.e., LAG) to multiple leaf switches (VTEPs). VTEPs that are connected to the same ES are called ES peers. When a neighbor entry is learned on a VTEP, it is distributed to both ES peers and remote VTEPs using EVPN MAC/IP advertisement routes. ES peers use the neighbor entry when routing traffic towards the multi-homed host and remote VTEPs use it for ARP/NS suppression. Motivation ========== If the ES link between a host and the VTEP on which the neighbor entry was locally learned goes down, the EVPN MAC/IP advertisement route will be withdrawn and the neighbor entries will be removed from both ES peers and remote VTEPs. Routing towards the multi-homed host and ARP/NS suppression can fail until another ES peer locally learns the neighbor entry and distributes it via an EVPN MAC/IP advertisement route. "draft-rbickhart-evpn-ip-mac-proxy-adv-03" [1] suggests avoiding these intermittent failures by having the ES peers install the neighbor entries as before, but also injecting EVPN MAC/IP advertisement routes with a proxy indication. When the previously mentioned ES link goes down and the original EVPN MAC/IP advertisement route is withdrawn, the ES peers will not withdraw their neighbor entries, but instead start aging timers for the proxy indication. If an ES peer locally learns the neighbor entry (i.e., it becomes "reachable"), it will restart its aging timer for the entry and emit an EVPN MAC/IP advertisement route without a proxy indication. An ES peer will stop its aging timer for the proxy indication if it observes the removal of the proxy indication from at least one of the ES peers advertising the entry. In the event that the aging timer for the proxy indication expired, an ES peer will withdraw its EVPN MAC/IP advertisement route. If the timer expired on all ES peers and they all withdrew their proxy advertisements, the neighbor entry will be completely removed from the EVPN fabric. Implementation ============== In the above scheme, when the control plane (e.g., FRR) advertises a neighbor entry with a proxy indication, it expects the corresponding entry in the data plane (i.e., the kernel) to remain valid and not be removed due to garbage collection or loss of carrier. The control plane also expects the kernel to notify it if the entry was learned locally (i.e., became "reachable") so that it will remove the proxy indication from the EVPN MAC/IP advertisement route. That is why these entries cannot be programmed with dummy states such as "permanent" or "noarp". Instead, add a new neighbor flag ("extern_valid") which indicates that the entry was learned and determined to be valid externally and should not be removed or invalidated by the kernel. The kernel can probe the entry and notify user space when it becomes "reachable" (it is initially installed as "stale"). However, if the kernel does not receive a confirmation, have it return the entry to the "stale" state instead of the "failed" state. In other words, an entry marked with the "extern_valid" flag behaves like any other dynamically learned entry other than the fact that the kernel cannot remove or invalidate it. One can argue that the "extern_valid" flag should not prevent garbage collection and that instead a neighbor entry should be programmed with both the "extern_valid" and "extern_learn" flags. There are two reasons for not doing that: 1. Unclear why a control plane would like to program an entry that the kernel cannot invalidate but can completely remove. 2. The "extern_learn" flag is used by FRR for neighbor entries learned on remote VTEPs (for ARP/NS suppression) whereas here we are concerned with local entries. This distinction is currently irrelevant for the kernel, but might be relevant in the future. Given that the flag only makes sense when the neighbor has a valid state, reject attempts to add a neighbor with an invalid state and with this flag set. For example: # ip neigh add 192.0.2.1 nud none dev br0.10 extern_valid Error: Cannot create externally validated neighbor with an invalid state. # ip neigh add 192.0.2.1 lladdr 00:11:22:33:44:55 nud stale dev br0.10 extern_valid # ip neigh replace 192.0.2.1 nud failed dev br0.10 extern_valid Error: Cannot mark neighbor as externally validated with an invalid state. The above means that a neighbor cannot be created with the "extern_valid" flag and flags such as "use" or "managed" as they result in a neighbor being created with an invalid state ("none") and immediately getting probed: # ip neigh add 192.0.2.1 lladdr 00:11:22:33:44:55 nud stale dev br0.10 extern_valid use Error: Cannot create externally validated neighbor with an invalid state. However, these flags can be used together with "extern_valid" after the neighbor was created with a valid state: # ip neigh add 192.0.2.1 lladdr 00:11:22:33:44:55 nud stale dev br0.10 extern_valid # ip neigh replace 192.0.2.1 lladdr 00:11:22:33:44:55 nud stale dev br0.10 extern_valid use One consequence of preventing the kernel from invalidating a neighbor entry is that by default it will only try to determine reachability using unicast probes. This can be changed using the "mcast_resolicit" sysctl: # sysctl net.ipv4.neigh.br0/10.mcast_resolicit 0 # tcpdump -nn -e -i br0.10 -Q out arp & # ip neigh replace 192.0.2.1 lladdr 00:11:22:33:44:55 nud stale dev br0.10 extern_valid use 62:50:1d:11:93:6f > 00:11:22:33:44:55, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > 00:11:22:33:44:55, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > 00:11:22:33:44:55, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 # sysctl -wq net.ipv4.neigh.br0/10.mcast_resolicit=3 # ip neigh replace 192.0.2.1 lladdr 00:11:22:33:44:55 nud stale dev br0.10 extern_valid use 62:50:1d:11:93:6f > 00:11:22:33:44:55, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > 00:11:22:33:44:55, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > 00:11:22:33:44:55, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > ff:ff:ff:ff:ff:ff, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > ff:ff:ff:ff:ff:ff, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 62:50:1d:11:93:6f > ff:ff:ff:ff:ff:ff, ethertype ARP (0x0806), length 42: Request who-has 192.0.2.1 tell 192.0.2.2, length 28 iproute2 patches can be found here [2]. [1] https://datatracker.ietf.org/doc/html/draft-rbickhart-evpn-ip-mac-proxy-adv-03 [2] https://github.com/idosch/iproute2/tree/submit/extern_valid_v1 Signed-off-by: Ido Schimmel Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20250626073111.244534-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 4 +++- include/uapi/linux/neighbour.h | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index c7ce5ec7be23..7e865b14749d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -261,13 +261,15 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_EXT_LEARNED BIT(5) #define NEIGH_UPDATE_F_ISROUTER BIT(6) #define NEIGH_UPDATE_F_ADMIN BIT(7) +#define NEIGH_UPDATE_F_EXT_VALIDATED BIT(8) /* In-kernel representation for NDA_FLAGS_EXT flags: */ #define NTF_OLD_MASK 0xff #define NTF_EXT_SHIFT 8 -#define NTF_EXT_MASK (NTF_EXT_MANAGED) +#define NTF_EXT_MASK (NTF_EXT_MANAGED | NTF_EXT_EXT_VALIDATED) #define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT) +#define NTF_EXT_VALIDATED (NTF_EXT_EXT_VALIDATED << NTF_EXT_SHIFT) extern const struct nla_policy nda_policy[]; diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index b851c36ad25d..c34a81245f87 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -54,6 +54,7 @@ enum { /* Extended flags under NDA_FLAGS_EXT: */ #define NTF_EXT_MANAGED (1 << 0) #define NTF_EXT_LOCKED (1 << 1) +#define NTF_EXT_EXT_VALIDATED (1 << 2) /* * Neighbor Cache Entry States. @@ -92,6 +93,10 @@ enum { * bridge in response to a host trying to communicate via a locked bridge port * with MAB enabled. Their purpose is to notify user space that a host requires * authentication. + * + * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by + * a user space control plane. The kernel will not remove or invalidate them, + * but it can probe them and notify user space when they become reachable. */ struct nda_cacheinfo { -- cgit v1.2.3 From 763ff02ce287c2e5c8a012d40bd2f3dab99ae5d5 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 20 Jun 2025 09:10:03 -0600 Subject: ublk: allow UBLK_IO_(UN)REGISTER_IO_BUF on any task Currently, UBLK_IO_REGISTER_IO_BUF and UBLK_IO_UNREGISTER_IO_BUF are only permitted on the ublk_io's daemon task. But this restriction is unnecessary. ublk_register_io_buf() calls __ublk_check_and_get_req() to look up the request from the tagset and atomically take a reference on the request without accessing the ublk_io. ublk_unregister_io_buf() doesn't use the q_id or tag at all. So allow these opcodes even on tasks other than io->task. Handle UBLK_IO_UNREGISTER_IO_BUF before obtaining the ubq and io since the buffer index being unregistered is not necessarily related to the specified q_id and tag. Add a feature flag UBLK_F_BUF_REG_OFF_DAEMON that userspace can use to determine whether the kernel supports off-daemon buffer registration. Suggested-by: Ming Lei Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250620151008.3976463-10-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c9751bdfd937..ec77dabba45b 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -301,6 +301,16 @@ */ #define UBLK_F_PER_IO_DAEMON (1ULL << 13) +/* + * If this feature is set, UBLK_U_IO_REGISTER_IO_BUF/UBLK_U_IO_UNREGISTER_IO_BUF + * can be issued for an I/O on any task. q_id and tag are also ignored in + * UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd. + * If it is unset, zero-copy buffers can only be registered and unregistered by + * the I/O's daemon task. The q_id and tag of the registered buffer are required + * in UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd. + */ +#define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 12853b279100adc7074b51db23bf62a41cb84cb8 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 27 Jun 2025 13:45:41 +0200 Subject: drm/mipi-dsi: Drop MIPI_DSI_MODE_VSYNC_FLUSH flag Drop the unused MIPI_DSI_MODE_VSYNC_FLUSH flag. Whether or not a display FIFO flush on vsync is required to avoid sending garbage to the panel is not a property of the DSI link, but of the integration between display controller and DSI host bridge. Acked-by: Marek Szyprowski Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20250627-dsi-vsync-flush-v2-4-4066899a5608@pengutronix.de Signed-off-by: Philipp Zabel --- include/drm/drm_mipi_dsi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..369b0d8830c3 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -130,8 +130,6 @@ struct mipi_dsi_host *of_find_mipi_dsi_host_by_node(struct device_node *node); #define MIPI_DSI_MODE_VIDEO_NO_HBP BIT(6) /* disable hsync-active area */ #define MIPI_DSI_MODE_VIDEO_NO_HSA BIT(7) -/* flush display FIFO on vsync pulse */ -#define MIPI_DSI_MODE_VSYNC_FLUSH BIT(8) /* disable EoT packets in HS mode */ #define MIPI_DSI_MODE_NO_EOT_PACKET BIT(9) /* device supports non-continuous clock behavior (DSI spec 5.6.1) */ -- cgit v1.2.3 From f458ccd2aa2c5a6f0129a9b1548f2825071fdc6b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 26 Jun 2025 21:58:04 +0300 Subject: RDMA/uverbs: Check CAP_NET_RAW in user namespace for flow create Currently, the capability check is done in the default init_user_ns user namespace. When a process runs in a non default user namespace, such check fails. Due to this when a process is running using Podman, it fails to create the flow resource. Since the RDMA device is a resource within a network namespace, use the network namespace associated with the RDMA device to determine its owning user namespace. Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") Signed-off-by: Parav Pandit Suggested-by: Eric W. Biederman Link: https://patch.msgid.link/6df6f2f24627874c4f6d041c19dc1f6f29f68f84.1750963874.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7da27f01eeb6..010594dc755b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4817,6 +4817,8 @@ static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) } #endif +bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs); + struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, @@ -4871,6 +4873,7 @@ static inline int ibdev_to_node(struct ib_device *ibdev) bool rdma_dev_access_netns(const struct ib_device *device, const struct net *net); +bool rdma_dev_has_raw_cap(const struct ib_device *dev); static inline struct net *rdma_dev_net(struct ib_device *device) { return read_pnet(&device->coredev.rdma_net); -- cgit v1.2.3 From 12c409aa1ec2592280a2ddcc66ff8f3c7f7bb171 Mon Sep 17 00:00:00 2001 From: Denis OSTERLAND-HEIM Date: Wed, 28 May 2025 12:57:50 +0200 Subject: pps: fix poll support Because pps_cdev_poll() returns unconditionally EPOLLIN, a user space program that calls select/poll get always an immediate data ready-to-read response. As a result the intended use to wait until next data becomes ready does not work. User space snippet: struct pollfd pollfd = { .fd = open("/dev/pps0", O_RDONLY), .events = POLLIN|POLLERR, .revents = 0 }; while(1) { poll(&pollfd, 1, 2000/*ms*/); // returns immediate, but should wait if(revents & EPOLLIN) { // always true struct pps_fdata fdata; memset(&fdata, 0, sizeof(memdata)); ioctl(PPS_FETCH, &fdata); // currently fetches data at max speed } } Lets remember the last fetch event counter and compare this value in pps_cdev_poll() with most recent event counter and return 0 if they are equal. Signed-off-by: Denis OSTERLAND-HEIM Co-developed-by: Rodolfo Giometti Signed-off-by: Rodolfo Giometti Fixes: eae9d2ba0cfc ("LinuxPPS: core support") Link: https://lore.kernel.org/all/f6bed779-6d59-4f0f-8a59-b6312bd83b4e@enneenne.com/ Acked-by: Rodolfo Giometti Link: https://lore.kernel.org/r/c3c50ad1eb19ef553eca8a57c17f4c006413ab70.camel@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/pps_kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index c7abce28ed29..aab0aebb529e 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -52,6 +52,7 @@ struct pps_device { int current_mode; /* PPS mode at event time */ unsigned int last_ev; /* last PPS event id */ + unsigned int last_fetched_ev; /* last fetched PPS event id */ wait_queue_head_t queue; /* PPS event queue */ unsigned int id; /* PPS source unique ID */ -- cgit v1.2.3 From b75e1f0619bd707e027812e262af3fbce445e71a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 9 Jun 2025 10:26:47 +0300 Subject: device property: Use tidy for_each_named_* macros Implementing if-conditions inside for_each_x() macros requires some thinking to avoid side effects in the calling code. Resulting code may look somewhat awkward, and there are couple of different ways it is usually done. Standardizing this to one way can help making it more obvious for a code reader and writer. The newly added for_each_if() is a way to achieve this. Use for_each_if() to make these macros look like many others which should in the long run help reading the code. Signed-off-by: Matti Vaittinen Acked-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/c98b39a7195006fdd24590b8d11bb271a72a0c8a.1749453752.git.mazziesaccount@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index f718dd4789e5..82f0cb3abd1e 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -17,6 +17,7 @@ #include #include #include +#include struct device; @@ -169,7 +170,7 @@ struct fwnode_handle *fwnode_get_next_available_child_node( #define fwnode_for_each_named_child_node(fwnode, child, name) \ fwnode_for_each_child_node(fwnode, child) \ - if (!fwnode_name_eq(child, name)) { } else + for_each_if(fwnode_name_eq(child, name)) #define fwnode_for_each_available_child_node(fwnode, child) \ for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\ @@ -184,7 +185,7 @@ struct fwnode_handle *device_get_next_child_node(const struct device *dev, #define device_for_each_named_child_node(dev, child, name) \ device_for_each_child_node(dev, child) \ - if (!fwnode_name_eq(child, name)) { } else + for_each_if(fwnode_name_eq(child, name)) #define device_for_each_child_node_scoped(dev, child) \ for (struct fwnode_handle *child __free(fwnode_handle) = \ @@ -193,7 +194,7 @@ struct fwnode_handle *device_get_next_child_node(const struct device *dev, #define device_for_each_named_child_node_scoped(dev, child, name) \ device_for_each_child_node_scoped(dev, child) \ - if (!fwnode_name_eq(child, name)) { } else + for_each_if(fwnode_name_eq(child, name)) struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname); -- cgit v1.2.3 From c6603b1d6556cc02d0169f74508ab0e3e3e4bd76 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:45 +0530 Subject: block: rename tuple_size field in blk_integrity to metadata_size The tuple_size field in blk_integrity currently represents the total size of metadata associated with each data interval. To make the meaning more explicit, rename tuple_size to metadata_size. This is a purely mechanical rename with no functional changes. Suggested-by: Martin K. Petersen Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/20250630090548.3317-2-anuj20.g@samsung.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Christian Brauner --- include/linux/blk-integrity.h | 4 ++-- include/linux/blkdev.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index c7eae0bfb013..d27730da47f3 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -33,7 +33,7 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { - return q->limits.integrity.tuple_size; + return q->limits.integrity.metadata_size; } static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) @@ -74,7 +74,7 @@ static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, unsigned int sectors) { - return bio_integrity_intervals(bi, sectors) * bi->tuple_size; + return bio_integrity_intervals(bi, sectors) * bi->metadata_size; } static inline bool blk_integrity_rq(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..ccda87d06a38 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -116,7 +116,7 @@ enum blk_integrity_checksum { struct blk_integrity { unsigned char flags; enum blk_integrity_checksum csum_type; - unsigned char tuple_size; + unsigned char metadata_size; unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; -- cgit v1.2.3 From 76e45252a4cefa205439eb6610a244771e7d88da Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:46 +0530 Subject: block: introduce pi_tuple_size field in blk_integrity Introduce a new pi_tuple_size field in struct blk_integrity to explicitly represent the size (in bytes) of the protection information (PI) tuple. This is a prep patch. Add validation in blk_validate_integrity_limits() to ensure that pi size matches the expected size for known checksum types and never exceeds the pi_tuple_size. Suggested-by: Christoph Hellwig Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/20250630090548.3317-3-anuj20.g@samsung.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ccda87d06a38..0d4011dcfed3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -120,6 +120,7 @@ struct blk_integrity { unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; + unsigned char pi_tuple_size; }; typedef unsigned int __bitwise blk_mode_t; -- cgit v1.2.3 From 9eb22f7fedfc9eb1b7f431a5359abd4d15b0b0cd Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:48 +0530 Subject: fs: add ioctl to query metadata and protection info capabilities Add a new ioctl, FS_IOC_GETLBMD_CAP, to query metadata and protection info (PI) capabilities. This ioctl returns information about the files integrity profile. This is useful for userspace applications to understand a files end-to-end data protection support and configure the I/O accordingly. For now this interface is only supported by block devices. However the design and placement of this ioctl in generic FS ioctl space allows us to extend it to work over files as well. This maybe useful when filesystems start supporting PI-aware layouts. A new structure struct logical_block_metadata_cap is introduced, which contains the following fields: 1. lbmd_flags: bitmask of logical block metadata capability flags 2. lbmd_interval: the amount of data described by each unit of logical block metadata 3. lbmd_size: size in bytes of the logical block metadata associated with each interval 4. lbmd_opaque_size: size in bytes of the opaque block tag associated with each interval 5. lbmd_opaque_offset: offset in bytes of the opaque block tag within the logical block metadata 6. lbmd_pi_size: size in bytes of the T10 PI tuple associated with each interval 7. lbmd_pi_offset: offset in bytes of T10 PI tuple within the logical block metadata 8. lbmd_pi_guard_tag_type: T10 PI guard tag type 9. lbmd_pi_app_tag_size: size in bytes of the T10 PI application tag 10. lbmd_pi_ref_tag_size: size in bytes of the T10 PI reference tag 11. lbmd_pi_storage_tag_size: size in bytes of the T10 PI storage tag The internal logic to fetch the capability is encapsulated in a helper function blk_get_meta_cap(), which uses the blk_integrity profile associated with the device. The ioctl returns -EOPNOTSUPP, if CONFIG_BLK_DEV_INTEGRITY is not enabled. Suggested-by: Martin K. Petersen Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/20250630090548.3317-5-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/blk-integrity.h | 7 +++++ include/uapi/linux/fs.h | 59 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index d27730da47f3..e04c6e5bf1c6 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -29,6 +29,8 @@ int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes); +int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, + struct logical_block_metadata_cap __user *argp); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -92,6 +94,11 @@ static inline struct bio_vec rq_integrity_vec(struct request *rq) rq->bio->bi_integrity->bip_iter); } #else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, + struct logical_block_metadata_cap __user *argp) +{ + return -EOPNOTSUPP; +} static inline int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *b) { diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0098b0ce8ccb..83720a2fd20d 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -91,6 +91,63 @@ struct fs_sysfs_path { __u8 name[128]; }; +/* Protection info capability flags */ +#define LBMD_PI_CAP_INTEGRITY (1 << 0) +#define LBMD_PI_CAP_REFTAG (1 << 1) + +/* Checksum types for Protection Information */ +#define LBMD_PI_CSUM_NONE 0 +#define LBMD_PI_CSUM_IP 1 +#define LBMD_PI_CSUM_CRC16_T10DIF 2 +#define LBMD_PI_CSUM_CRC64_NVME 4 + +/* sizeof first published struct */ +#define LBMD_SIZE_VER0 16 + +/* + * Logical block metadata capability descriptor + * If the device does not support metadata, all the fields will be zero. + * Applications must check lbmd_flags to determine whether metadata is + * supported or not. + */ +struct logical_block_metadata_cap { + /* Bitmask of logical block metadata capability flags */ + __u32 lbmd_flags; + /* + * The amount of data described by each unit of logical block + * metadata + */ + __u16 lbmd_interval; + /* + * Size in bytes of the logical block metadata associated with each + * interval + */ + __u8 lbmd_size; + /* + * Size in bytes of the opaque block tag associated with each + * interval + */ + __u8 lbmd_opaque_size; + /* + * Offset in bytes of the opaque block tag within the logical block + * metadata + */ + __u8 lbmd_opaque_offset; + /* Size in bytes of the T10 PI tuple associated with each interval */ + __u8 lbmd_pi_size; + /* Offset in bytes of T10 PI tuple within the logical block metadata */ + __u8 lbmd_pi_offset; + /* T10 PI guard tag type */ + __u8 lbmd_guard_tag_type; + /* Size in bytes of the T10 PI application tag */ + __u8 lbmd_app_tag_size; + /* Size in bytes of the T10 PI reference tag */ + __u8 lbmd_ref_tag_size; + /* Size in bytes of the T10 PI storage tag */ + __u8 lbmd_storage_tag_size; + __u8 pad; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 @@ -247,6 +304,8 @@ struct fsxattr { * also /sys/kernel/debug/ for filesystems with debugfs exports */ #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) +/* Get logical block metadata capability details */ +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) -- cgit v1.2.3 From e78f70bad29c5ae1e1076698b690b15794e9b81e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 1 Jul 2025 14:32:25 +0200 Subject: time/timecounter: Fix the lie that struct cyclecounter is const In both the read callback for struct cyclecounter, and in struct timecounter, struct cyclecounter is declared as a const pointer. Unfortunatly, a number of users of this pointer treat it as a non-const pointer as it is burried in a larger structure that is heavily modified by the callback function when accessed. This lie had been hidden by the fact that container_of() "casts away" a const attribute of a pointer without any compiler warning happening at all. Fix this all up by removing the const attribute in the needed places so that everyone can see that the structure really isn't const, but can, and is, modified by the users of it. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/2025070124-backyard-hurt-783a@gregkh --- include/linux/timecounter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index 0982d1d52b24..dce03a5cafb7 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -28,7 +28,7 @@ * @shift: cycle to nanosecond divisor (power of two) */ struct cyclecounter { - u64 (*read)(const struct cyclecounter *cc); + u64 (*read)(struct cyclecounter *cc); u64 mask; u32 mult; u32 shift; @@ -53,7 +53,7 @@ struct cyclecounter { * @frac: accumulated fractional nanoseconds */ struct timecounter { - const struct cyclecounter *cc; + struct cyclecounter *cc; u64 cycle_last; u64 nsec; u64 mask; @@ -100,7 +100,7 @@ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) * the time stamp counter by the number of elapsed nanoseconds. */ extern void timecounter_init(struct timecounter *tc, - const struct cyclecounter *cc, + struct cyclecounter *cc, u64 start_tstamp); /** -- cgit v1.2.3 From b9f58d3572a8e1ef707b941eae58ec4014b9269d Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 20 Jun 2025 21:13:07 +0800 Subject: ACPI: Return -ENODEV from acpi_parse_spcr() when SPCR support is disabled If CONFIG_ACPI_SPCR_TABLE is disabled, acpi_parse_spcr() currently returns 0, which may incorrectly suggest that SPCR parsing was successful. This patch changes the behavior to return -ENODEV to clearly indicate that SPCR support is not available. This prepares the codebase for future changes that depend on acpi_parse_spcr() failure detection, such as suppressing misleading console messages. Signed-off-by: Li Chen Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20250620131309.126555-2-me@linux.beauty Signed-off-by: Catalin Marinas --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f102c0fe3431..71e692f95290 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1503,7 +1503,7 @@ int acpi_parse_spcr(bool enable_earlycon, bool enable_console); #else static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) { - return 0; + return -ENODEV; } #endif -- cgit v1.2.3 From 536f5941adde41c99a18a0ba03b457adc9702ab8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 27 Jun 2025 17:19:23 +0300 Subject: libnvdimm: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. While doing that, sort headers alphabetically. Reviewed-by: Dave Jiang Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250627142001.994860-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ira Weiny --- include/linux/libnvdimm.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index e772aae71843..28f086c4a187 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -6,12 +6,12 @@ */ #ifndef __LIBNVDIMM_H__ #define __LIBNVDIMM_H__ -#include + +#include #include +#include #include #include -#include -#include struct badrange_entry { u64 start; @@ -80,7 +80,9 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); +struct attribute_group; struct device_node; +struct module; struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long cmd_mask; @@ -121,6 +123,8 @@ struct nd_mapping_desc { int position; }; +struct bio; +struct resource; struct nd_region; struct nd_region_desc { struct resource *res; @@ -147,8 +151,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0); } -struct nvdimm_bus; - /* * Note that separate bits for locked + unlocked are defined so that * 'flags == 0' corresponds to an error / not-supported state. @@ -238,6 +240,9 @@ struct nvdimm_fw_ops { int (*arm)(struct nvdimm *nvdimm, enum nvdimm_fwa_trigger arg); }; +struct kobject; +struct nvdimm_bus; + void badrange_init(struct badrange *badrange); int badrange_add(struct badrange *badrange, u64 addr, u64 length); void badrange_forget(struct badrange *badrange, phys_addr_t start, -- cgit v1.2.3 From b6139a6abf673029008f80d42abd3848d80a9108 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 17 Jun 2025 15:43:23 +0200 Subject: lib/group_cpus: Let group_cpu_evenly() return the number of initialized masks group_cpu_evenly() might have allocated less groups then requested: group_cpu_evenly() __group_cpus_evenly() alloc_nodes_groups() # allocated total groups may be less than numgrps when # active total CPU number is less then numgrps In this case, the caller will do an out of bound access because the caller assumes the masks returned has numgrps. Return the number of groups created so the caller can limit the access range accordingly. Acked-by: Thomas Gleixner Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Daniel Wagner Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250617-isolcpus-queue-counters-v1-1-13923686b54b@kernel.org Signed-off-by: Jens Axboe --- include/linux/group_cpus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/group_cpus.h b/include/linux/group_cpus.h index e42807ec61f6..9d4e5ab6c314 100644 --- a/include/linux/group_cpus.h +++ b/include/linux/group_cpus.h @@ -9,6 +9,6 @@ #include #include -struct cpumask *group_cpus_evenly(unsigned int numgrps); +struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks); #endif -- cgit v1.2.3 From 3f27c1de5df265f9d8edf0cc5d75dc92e328484a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 17 Jun 2025 15:43:24 +0200 Subject: blk-mq: add number of queue calc helper Add two variants of helper functions that calculate the correct number of queues to use. Two variants are needed because some drivers base their maximum number of queues on the possible CPU mask, while others use the online CPU mask. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Daniel Wagner Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20250617-isolcpus-queue-counters-v1-2-13923686b54b@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index de8c85a03bb7..2a5a828f19a0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -947,6 +947,8 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); void blk_freeze_queue_start_non_owner(struct request_queue *q); +unsigned int blk_mq_num_possible_queues(unsigned int max_queues); +unsigned int blk_mq_num_online_queues(unsigned int max_queues); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, struct device *dev, unsigned int offset); -- cgit v1.2.3 From fc9a099567813a9fef0da07b94ecb8dee64703c4 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Fri, 27 Jun 2025 12:10:11 +0200 Subject: docs: dma-api: add a kernel-doc comment for dma_pool_zalloc() Document the dma_pool_zalloc() wrapper. Signed-off-by: Petr Tesarik Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Marek Szyprowski [jc: fixed up dma_pool_alloc() reference in dmapool.h] Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250627101015.1600042-5-ptesarik@suse.com --- include/linux/dmapool.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 06c4de602b2f..7d40b51933d1 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -60,6 +60,14 @@ static inline struct dma_pool *dma_pool_create(const char *name, NUMA_NO_NODE); } +/** + * dma_pool_zalloc - Get a zero-initialized block of DMA coherent memory. + * @pool: dma pool that will produce the block + * @mem_flags: GFP_* bitmask + * @handle: pointer to dma address of block + * + * Same as dma_pool_alloc(), but the returned memory is zeroed. + */ static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { -- cgit v1.2.3 From 9df7b5ebead649b00bf9a53a798e4bf83a1318fd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:37 +0100 Subject: netfs: Fix double put of request If a netfs request finishes during the pause loop, it will have the ref that belongs to the IN_PROGRESS flag removed at that point - however, if it then goes to the final wait loop, that will *also* put the ref because it sees that the IN_PROGRESS flag is clear and incorrectly assumes that this happened when it called the collector. In fact, since IN_PROGRESS is clear, we shouldn't call the collector again since it's done all the cleanup, such as calling ->ki_complete(). Fix this by making netfs_collect_in_app() just return, indicating that we're done if IN_PROGRESS is removed. Fixes: 2b1424cd131c ("netfs: Fix wait/wake to be consistent about the waitqueue used") Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-3-dhowells@redhat.com Tested-by: Steve French Reviewed-by: Paulo Alcantara cc: Steve French cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 333d2e38dd2c..ba35dc66e986 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -56,6 +56,7 @@ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ -- cgit v1.2.3 From 5e1e6ec2e346c0850f304c31abdef4158007474e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:46 +0100 Subject: netfs: Merge i_size update functions Netfslib has two functions for updating the i_size after a write: one for buffered writes into the pagecache and one for direct/unbuffered writes. However, what needs to be done is much the same in both cases, so merge them together. This does raise one question, though: should updating the i_size after a direct write do the same estimated update of i_blocks as is done for buffered writes. Also get rid of the cleanup function pointer from netfs_io_request as it's only used for direct write to update i_size; instead do the i_size setting directly from write collection. Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-12-dhowells@redhat.com cc: Steve French cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..d8186b90fb38 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -279,7 +279,6 @@ struct netfs_io_request { #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* -- cgit v1.2.3 From 4e32541076833f5ce2e23523c9faa25f7b2cc96f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:47 +0100 Subject: netfs: Renumber the NETFS_RREQ_* flags to make traces easier to read Renumber the NETFS_RREQ_* flags to put the most useful status bits in the bottom nibble - and therefore the last hex digit in the trace output - making it easier to grasp the state at a glance. In particular, put the IN_PROGRESS flag in bit 0 and ALL_QUEUED at bit 1. Also make the flags field in /proc/fs/netfs/requests larger to accommodate all the flags. Also make the flags field in the netfs_sreq tracepoint larger to accommodate all the NETFS_SREQ_* flags. Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-13-dhowells@redhat.com Reviewed-by: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 20 ++++++++++---------- include/trace/events/netfs.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index d8186b90fb38..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -265,17 +265,17 @@ struct netfs_io_request { bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ -#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ -#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index ba35dc66e986..c2d581429a7b 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -367,7 +367,7 @@ TRACE_EVENT(netfs_sreq, __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", + TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), -- cgit v1.2.3 From 90b3ccf514578ca3a6ac25db51a29a48e34e0f1b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:48 +0100 Subject: netfs: Update tracepoints in a number of ways Make a number of updates to the netfs tracepoints: (1) Remove a duplicate trace from netfs_unbuffered_write_iter_locked(). (2) Move the trace in netfs_wake_rreq_flag() to after the flag is cleared so that the change appears in the trace. (3) Differentiate the use of netfs_rreq_trace_wait/woke_queue symbols. (4) Don't do so many trace emissions in the wait functions as some of them are redundant. (5) In netfs_collect_read_results(), differentiate a subreq that's being abandoned vs one that has been consumed in a regular way. (6) Add a tracepoint to indicate the call to ->ki_complete(). (7) Don't double-increment the subreq_counter when retrying a write. (8) Move the netfs_sreq_trace_io_progress tracepoint within cifs code to just MID_RESPONSE_RECEIVED and add different tracepoints for other MID states and note check failure. Signed-off-by: David Howells Co-developed-by: Paulo Alcantara Signed-off-by: Paulo Alcantara Link: https://lore.kernel.org/20250701163852.2171681-14-dhowells@redhat.com cc: Steve French cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index c2d581429a7b..73e96ccbe830 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -50,12 +50,13 @@ #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ @@ -64,13 +65,15 @@ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ - EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ + EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \ + EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \ + EM(netfs_rreq_trace_waited_ip, "DONE-IP") \ + EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \ + EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ - EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ @@ -83,6 +86,7 @@ E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_abandoned, "ABNDN") \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ @@ -90,6 +94,7 @@ EM(netfs_sreq_trace_cache_write, "CA-WR") \ EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ + EM(netfs_sreq_trace_consumed, "CONSM") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ EM(netfs_sreq_trace_donate_to_next, "DON-N") \ @@ -97,7 +102,12 @@ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ + EM(netfs_sreq_trace_io_bad, "I-BAD") \ + EM(netfs_sreq_trace_io_malformed, "I-MLF") \ + EM(netfs_sreq_trace_io_unknown, "I-UNK") \ + EM(netfs_sreq_trace_io_progress, "I-OK ") \ + EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \ + EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ @@ -143,8 +153,8 @@ #define netfs_sreq_ref_traces \ EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ - EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \ + EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ -- cgit v1.2.3 From 2f952c9e8fe13c6ee15c05189f1f87c1a70b866c Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 30 Jun 2025 18:20:11 +0200 Subject: fs: split fileattr related helpers into separate file This patch moves function related to file extended attributes manipulations to separate file. Refactoring only. Signed-off-by: Andrey Albershteyn Link: https://lore.kernel.org/20250630-xattrat-syscall-v6-1-c4e3bc35227b@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fileattr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index 47c05a9851d0..6030d0bf7ad3 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -55,5 +55,9 @@ static inline bool fileattr_has_fsx(const struct fileattr *fa) int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); +int ioctl_getflags(struct file *file, unsigned int __user *argp); +int ioctl_setflags(struct file *file, unsigned int __user *argp); +int ioctl_fsgetxattr(struct file *file, void __user *argp); +int ioctl_fssetxattr(struct file *file, void __user *argp); #endif /* _LINUX_FILEATTR_H */ -- cgit v1.2.3 From defdd02d783c6fa22d0005bdc238ccd9174faf20 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 30 Jun 2025 18:20:12 +0200 Subject: lsm: introduce new hooks for setting/getting inode fsxattr Introduce new hooks for setting and getting filesystem extended attributes on inode (FS_IOC_FSGETXATTR). Cc: selinux@vger.kernel.org Cc: Paul Moore Acked-by: Paul Moore Signed-off-by: Andrey Albershteyn Link: https://lore.kernel.org/20250630-xattrat-syscall-v6-2-c4e3bc35227b@kernel.org Signed-off-by: Christian Brauner --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index bf3bbac4e02a..9600a4350e79 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -157,6 +157,8 @@ LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_file_setattr, struct dentry *dentry, struct fileattr *fa) +LSM_HOOK(int, 0, inode_file_getattr, struct dentry *dentry, struct fileattr *fa) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, diff --git a/include/linux/security.h b/include/linux/security.h index dba349629229..9ed0d0e0c81f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -451,6 +451,10 @@ int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); void security_inode_post_removexattr(struct dentry *dentry, const char *name); +int security_inode_file_setattr(struct dentry *dentry, + struct fileattr *fa); +int security_inode_file_getattr(struct dentry *dentry, + struct fileattr *fa); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, @@ -1052,6 +1056,18 @@ static inline void security_inode_post_removexattr(struct dentry *dentry, const char *name) { } +static inline int security_inode_file_setattr(struct dentry *dentry, + struct fileattr *fa) +{ + return 0; +} + +static inline int security_inode_file_getattr(struct dentry *dentry, + struct fileattr *fa) +{ + return 0; +} + static inline int security_inode_need_killpriv(struct dentry *dentry) { return cap_inode_need_killpriv(dentry); -- cgit v1.2.3 From 1a4eabf662543c62ae1e71a26d1c8e6643c66388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:01 +0100 Subject: mfd: adp5585: Refactor how regmap defaults are handled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only thing changing between variants is the regmap default registers. Hence, instead of having a regmap configuration for every variant (duplicating lots of fields), add a chip info type of structure with a regmap ID to identify which defaults to use and populate regmap_config at runtime given a template plus the id. Also note that between variants, the defaults can be the same which means the chip info structure can be used in more than one compatible. This will also make it simpler adding new chips with more variants. Also note that the chip info structures are deliberately not const as they will also contain lots of members that are the same between the different devices variants and so we will fill those at runtime. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-6-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 016033cd68e4..c56af8d8d76c 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -119,8 +119,19 @@ struct regmap; +enum adp5585_variant { + ADP5585_00 = 1, + ADP5585_01, + ADP5585_02, + ADP5585_03, + ADP5585_04, + ADP5585_MAX +}; + struct adp5585_dev { + struct device *dev; struct regmap *regmap; + enum adp5585_variant variant; }; #endif -- cgit v1.2.3 From 0190a72f28ee0995c546fd4fcf80ed25a0fc4b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:02 +0100 Subject: mfd: adp5585: Add support for adp5589 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADP5589 is a 19 I/O port expander with built-in keypad matrix decoder, programmable logic, reset generator, and PWM generator. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-7-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index c56af8d8d76c..70e58122a36a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -117,6 +117,12 @@ #define ADP5585_BANK(n) ((n) >= 6 ? 1 : 0) #define ADP5585_BIT(n) ((n) >= 6 ? BIT((n) - 6) : BIT(n)) +/* ADP5589 */ +#define ADP5589_MAN_ID_VALUE 0x10 +#define ADP5589_GPI_STATUS_C 0x18 +#define ADP5589_INT_EN 0x4e +#define ADP5589_MAX_REG ADP5589_INT_EN + struct regmap; enum adp5585_variant { @@ -125,6 +131,9 @@ enum adp5585_variant { ADP5585_02, ADP5585_03, ADP5585_04, + ADP5589_00, + ADP5589_01, + ADP5589_02, ADP5585_MAX }; @@ -132,6 +141,7 @@ struct adp5585_dev { struct device *dev; struct regmap *regmap; enum adp5585_variant variant; + unsigned int id; }; #endif -- cgit v1.2.3 From 7077fb501b95360c7fe35553f2bdb1ccf34edd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:03 +0100 Subject: mfd: adp5585: Add a per chip reg struture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some differences in the register map between the devices. Hence, add a register structure per device. This will be needed in following patches. On top of that adp5585_fill_regmap_config() is renamed and reworked so that the current struct adp5585_info act as template (they indeed contain all the different data between variants) which can then be complemented depending on the device (as identified by the id register). This is done like this since a lot of the data is pretty much the same between variants of the same device. Reviewed-by: Lee Jones Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-8-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 70e58122a36a..6ecb90a6276c 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -120,6 +120,7 @@ /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 #define ADP5589_GPI_STATUS_C 0x18 +#define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN @@ -137,9 +138,14 @@ enum adp5585_variant { ADP5585_MAX }; +struct adp5585_regs { + unsigned int ext_cfg; +}; + struct adp5585_dev { struct device *dev; struct regmap *regmap; + const struct adp5585_regs *regs; enum adp5585_variant variant; unsigned int id; }; -- cgit v1.2.3 From 9f425bf713b511b1078e0fea5a88c497e13dbb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:04 +0100 Subject: gpio: adp5585: add support for the adp5589 expander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support the adp5589 I/O expander which supports up to 19 pins. We need to add a chip_info based struct since accessing register "banks" and "bits" differs between devices. Also some register addresses are different. While at it move ADP558X_GPIO_MAX defines to the main header file and rename them. That information will be needed by the top level device in a following change. Acked-by: Linus Walleij Acked-by: Bartosz Golaszewski Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-9-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 6ecb90a6276c..d26f722cf31a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -107,23 +107,23 @@ #define ADP5585_MAX_REG ADP5585_INT_EN -/* - * Bank 0 covers pins "GPIO 1/R0" to "GPIO 6/R5", numbered 0 to 5 by the - * driver, and bank 1 covers pins "GPIO 7/C0" to "GPIO 11/C4", numbered 6 to - * 10. Some variants of the ADP5585 don't support "GPIO 6/R5". As the driver - * uses identical GPIO numbering for all variants to avoid confusion, GPIO 5 is - * marked as reserved in the device tree for variants that don't support it. - */ -#define ADP5585_BANK(n) ((n) >= 6 ? 1 : 0) -#define ADP5585_BIT(n) ((n) >= 6 ? BIT((n) - 6) : BIT(n)) +#define ADP5585_PIN_MAX 11 /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 +#define ADP5589_GPI_STATUS_A 0x16 #define ADP5589_GPI_STATUS_C 0x18 +#define ADP5589_RPULL_CONFIG_A 0x19 +#define ADP5589_DEBOUNCE_DIS_A 0x27 +#define ADP5589_GPO_DATA_OUT_A 0x2a +#define ADP5589_GPO_OUT_MODE_A 0x2d +#define ADP5589_GPIO_DIRECTION_A 0x30 #define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN +#define ADP5589_PIN_MAX 19 + struct regmap; enum adp5585_variant { -- cgit v1.2.3 From 75024f97e82e63d02b0743500efb1e264a1c2dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:05 +0100 Subject: pwm: adp5585: add support for adp5589 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the adp5589 I/O expander. From a PWM point of view it is pretty similar to adp5585. Main difference is the address of registers meaningful for configuring the PWM. Acked-by: Uwe Kleine-König Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-10-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index d26f722cf31a..77f7c74f084d 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -118,6 +118,9 @@ #define ADP5589_GPO_DATA_OUT_A 0x2a #define ADP5589_GPO_OUT_MODE_A 0x2d #define ADP5589_GPIO_DIRECTION_A 0x30 +#define ADP5589_PWM_OFFT_LOW 0x3e +#define ADP5589_PWM_ONT_LOW 0x40 +#define ADP5589_PWM_CFG 0x42 #define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN -- cgit v1.2.3 From 47a1f759b776ec9287f675f5d4fbf60b94cc566d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:07 +0100 Subject: mfd: adp5585: Add support for event handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These devices are capable of generate FIFO based events based on KEY or GPI presses. Add support for handling these events. This is in preparation of adding full support for keymap and gpis based events. Reviewed-by: Lee Jones Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-12-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 77f7c74f084d..43a33a3d3f5a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -10,13 +10,20 @@ #define __MFD_ADP5585_H_ #include +#include #define ADP5585_ID 0x00 #define ADP5585_MAN_ID_VALUE 0x20 #define ADP5585_MAN_ID_MASK GENMASK(7, 4) +#define ADP5585_REV_ID_MASK GENMASK(3, 0) #define ADP5585_INT_STATUS 0x01 +#define ADP5585_OVRFLOW_INT BIT(2) +#define ADP5585_EVENT_INT BIT(0) #define ADP5585_STATUS 0x02 +#define ADP5585_EC_MASK GENMASK(4, 0) #define ADP5585_FIFO_1 0x03 +#define ADP5585_KEV_EV_PRESS_MASK BIT(7) +#define ADP5585_KEY_EVENT_MASK GENMASK(6, 0) #define ADP5585_FIFO_2 0x04 #define ADP5585_FIFO_3 0x05 #define ADP5585_FIFO_4 0x06 @@ -32,6 +39,7 @@ #define ADP5585_FIFO_14 0x10 #define ADP5585_FIFO_15 0x11 #define ADP5585_FIFO_16 0x12 +#define ADP5585_EV_MAX (ADP5585_FIFO_16 - ADP5585_FIFO_1 + 1) #define ADP5585_GPI_INT_STAT_A 0x13 #define ADP5585_GPI_INT_STAT_B 0x14 #define ADP5585_GPI_STATUS_A 0x15 @@ -104,6 +112,8 @@ #define ADP5585_INT_CFG BIT(1) #define ADP5585_RST_CFG BIT(0) #define ADP5585_INT_EN 0x3c +#define ADP5585_OVRFLOW_IEN BIT(2) +#define ADP5585_EVENT_IEN BIT(0) #define ADP5585_MAX_REG ADP5585_INT_EN @@ -121,7 +131,9 @@ #define ADP5589_PWM_OFFT_LOW 0x3e #define ADP5589_PWM_ONT_LOW 0x40 #define ADP5589_PWM_CFG 0x42 +#define ADP5589_POLL_PTIME_CFG 0x48 #define ADP5589_PIN_CONFIG_D 0x4C +#define ADP5589_GENERAL_CFG 0x4d #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN @@ -142,15 +154,21 @@ enum adp5585_variant { }; struct adp5585_regs { + unsigned int gen_cfg; unsigned int ext_cfg; + unsigned int int_en; + unsigned int poll_ptime_cfg; }; struct adp5585_dev { struct device *dev; struct regmap *regmap; const struct adp5585_regs *regs; + struct blocking_notifier_head event_notifier; enum adp5585_variant variant; unsigned int id; + int irq; + unsigned int ev_poll_time; }; #endif -- cgit v1.2.3 From 333812da70d5f71bf5e176f6d55a5f716301b5fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:08 +0100 Subject: mfd: adp5585: Support reset and unlock events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADP558x family of devices can be programmed to respond to some especial events, In case of the unlock events, one can lock the keypad and use KEYS or GPIs events to unlock it. For the reset events, one can again use a combinations of GPIs/KEYs in order to generate an event that will trigger the device to generate an output reset pulse. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-13-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 43a33a3d3f5a..db483ef9693a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -68,6 +68,7 @@ #define ADP5585_GPIO_DIRECTION_A 0x27 #define ADP5585_GPIO_DIRECTION_B 0x28 #define ADP5585_RESET1_EVENT_A 0x29 +#define ADP5585_RESET_EV_PRESS BIT(7) #define ADP5585_RESET1_EVENT_B 0x2a #define ADP5585_RESET1_EVENT_C 0x2b #define ADP5585_RESET2_EVENT_A 0x2c @@ -118,6 +119,13 @@ #define ADP5585_MAX_REG ADP5585_INT_EN #define ADP5585_PIN_MAX 11 +#define ADP5585_MAX_UNLOCK_TIME_SEC 7 +#define ADP5585_KEY_EVENT_START 1 +#define ADP5585_KEY_EVENT_END 25 +#define ADP5585_GPI_EVENT_START 37 +#define ADP5585_GPI_EVENT_END 47 +#define ADP5585_ROW5_KEY_EVENT_START 1 +#define ADP5585_ROW5_KEY_EVENT_END 30 /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 @@ -128,6 +136,20 @@ #define ADP5589_GPO_DATA_OUT_A 0x2a #define ADP5589_GPO_OUT_MODE_A 0x2d #define ADP5589_GPIO_DIRECTION_A 0x30 +#define ADP5589_UNLOCK1 0x33 +#define ADP5589_UNLOCK_EV_PRESS BIT(7) +#define ADP5589_UNLOCK_TIMERS 0x36 +#define ADP5589_UNLOCK_TIMER GENMASK(2, 0) +#define ADP5589_LOCK_CFG 0x37 +#define ADP5589_LOCK_EN BIT(0) +#define ADP5589_RESET1_EVENT_A 0x38 +#define ADP5589_RESET2_EVENT_A 0x3B +#define ADP5589_RESET_CFG 0x3D +#define ADP5585_RESET2_POL BIT(7) +#define ADP5585_RESET1_POL BIT(6) +#define ADP5585_RST_PASSTHRU_EN BIT(5) +#define ADP5585_RESET_TRIG_TIME GENMASK(4, 2) +#define ADP5585_PULSE_WIDTH GENMASK(1, 0) #define ADP5589_PWM_OFFT_LOW 0x3e #define ADP5589_PWM_ONT_LOW 0x40 #define ADP5589_PWM_CFG 0x42 @@ -138,6 +160,11 @@ #define ADP5589_MAX_REG ADP5589_INT_EN #define ADP5589_PIN_MAX 19 +#define ADP5589_KEY_EVENT_START 1 +#define ADP5589_KEY_EVENT_END 88 +#define ADP5589_GPI_EVENT_START 97 +#define ADP5589_GPI_EVENT_END 115 +#define ADP5589_UNLOCK_WILDCARD 127 struct regmap; @@ -158,6 +185,9 @@ struct adp5585_regs { unsigned int ext_cfg; unsigned int int_en; unsigned int poll_ptime_cfg; + unsigned int reset_cfg; + unsigned int reset1_event_a; + unsigned int reset2_event_a; }; struct adp5585_dev { @@ -167,8 +197,18 @@ struct adp5585_dev { struct blocking_notifier_head event_notifier; enum adp5585_variant variant; unsigned int id; + bool has_unlock; + bool has_pin6; int irq; unsigned int ev_poll_time; + unsigned int unlock_time; + unsigned int unlock_keys[2]; + unsigned int nkeys_unlock; + unsigned int reset1_keys[3]; + unsigned int nkeys_reset1; + unsigned int reset2_keys[2]; + unsigned int nkeys_reset2; + u8 reset_cfg; }; #endif -- cgit v1.2.3 From bd113a13e1fa51789f55987369b80e1d8bc19389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:09 +0100 Subject: mfd: adp5585: Add support for input devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADP558x family supports a built in keypad matrix decoder which can be added as an Input device. In order to both support the Input and the GPIO device, we need to create a bitmap of the supported pins and track their usage since they can either be used as GPIOs (GPIs) or as part of the keymap. We also need to mark special pins busy in case some features are being used (ex: pwm or reset events). Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-14-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index db483ef9693a..41c5d2e1cc7c 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -126,6 +126,10 @@ #define ADP5585_GPI_EVENT_END 47 #define ADP5585_ROW5_KEY_EVENT_START 1 #define ADP5585_ROW5_KEY_EVENT_END 30 +#define ADP5585_PWM_OUT 3 +#define ADP5585_RESET1_OUT 4 +#define ADP5585_RESET2_OUT 9 +#define ADP5585_ROW5 5 /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 @@ -154,6 +158,7 @@ #define ADP5589_PWM_ONT_LOW 0x40 #define ADP5589_PWM_CFG 0x42 #define ADP5589_POLL_PTIME_CFG 0x48 +#define ADP5589_PIN_CONFIG_A 0x49 #define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_GENERAL_CFG 0x4d #define ADP5589_INT_EN 0x4e @@ -165,6 +170,7 @@ #define ADP5589_GPI_EVENT_START 97 #define ADP5589_GPI_EVENT_END 115 #define ADP5589_UNLOCK_WILDCARD 127 +#define ADP5589_RESET2_OUT 12 struct regmap; @@ -188,6 +194,7 @@ struct adp5585_regs { unsigned int reset_cfg; unsigned int reset1_event_a; unsigned int reset2_event_a; + unsigned int pin_cfg_a; }; struct adp5585_dev { @@ -195,6 +202,9 @@ struct adp5585_dev { struct regmap *regmap; const struct adp5585_regs *regs; struct blocking_notifier_head event_notifier; + unsigned long *pin_usage; + unsigned int n_pins; + unsigned int reset2_out; enum adp5585_variant variant; unsigned int id; bool has_unlock; -- cgit v1.2.3 From 988b28a83b658137e58123f4dafc3a1588e1cb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:10 +0100 Subject: gpio: adp5585: support gpi events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for adding GPIs to the event FIFO. This is done by adding irq_chip support. Like this, one can use the input gpio_keys driver as a "frontend" device and input handler. As part of this change, we now implement .request() and .free() as we can't blindly consume all available pins as GPIOs (example: some pins can be used for forming a keymap matrix). Also note that the number of pins can now be obtained from the parent, top level device. Hence the 'max_gpio' variable can be removed. Reviewed-by: Linus Walleij Acked-by: Bartosz Golaszewski Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-15-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 41c5d2e1cc7c..5237da6b4a9f 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -136,6 +136,8 @@ #define ADP5589_GPI_STATUS_A 0x16 #define ADP5589_GPI_STATUS_C 0x18 #define ADP5589_RPULL_CONFIG_A 0x19 +#define ADP5589_GPI_INT_LEVEL_A 0x1e +#define ADP5589_GPI_EVENT_EN_A 0x21 #define ADP5589_DEBOUNCE_DIS_A 0x27 #define ADP5589_GPO_DATA_OUT_A 0x2a #define ADP5589_GPO_OUT_MODE_A 0x2d -- cgit v1.2.3 From 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 13 Jun 2025 14:20:13 -0700 Subject: drm/sched: Increment job count before swapping tail spsc queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A small race exists between spsc_queue_push and the run-job worker, in which spsc_queue_push may return not-first while the run-job worker has already idled due to the job count being zero. If this race occurs, job scheduling stops, leading to hangs while waiting on the job’s DMA fences. Seal this race by incrementing the job count before appending to the SPSC queue. This race was observed on a drm-tip 6.16-rc1 build with the Xe driver in an SVM test case. Fixes: 1b1f42d8fde4 ("drm: move amd_gpu_scheduler into common location") Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250613212013.719312-1-matthew.brost@intel.com --- include/drm/spsc_queue.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer -- cgit v1.2.3 From e3d6e1b9a34c745b635f122ac471a198867cd0ec Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:36:02 +0900 Subject: tracing: tprobe-events: Support multiple tprobes on the same tracepoint Allow user to set multiple tracepoint-probe events on the same tracepoint. After the last tprobe-event is removed, the tracepoint callback is unregistered. Link: https://lore.kernel.org/all/174343536245.843280.6548776576601537671.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- include/linux/module.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 5faa1fb1f4b4..2cb5be5cc7e6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1019,4 +1020,7 @@ static inline unsigned long find_kallsyms_symbol_value(struct module *mod, #endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ +/* Define __free(module_put) macro for struct module *. */ +DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T)) + #endif /* _LINUX_MODULE_H */ -- cgit v1.2.3 From 2db832ec9090d3b5f726f49ad4d0322d6b68a490 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:36:11 +0900 Subject: tracing: fprobe-events: Register fprobe-events only when it is enabled Currently fprobe events are registered when it is defined. Thus it will give some overhead even if it is disabled. This changes it to register the fprobe only when it is enabled. Link: https://lore.kernel.org/all/174343537128.843280.16131300052837035043.stgit@devnote2/ Suggested-by: Steven Rostedt Signed-off-by: Masami Hiramatsu (Google) --- include/linux/fprobe.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 702099f08929..7964db96e41a 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -94,6 +94,7 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); int unregister_fprobe(struct fprobe *fp); bool fprobe_is_registered(struct fprobe *fp); +int fprobe_count_ips_from_filter(const char *filter, const char *notfilter); #else static inline int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) { @@ -115,6 +116,10 @@ static inline bool fprobe_is_registered(struct fprobe *fp) { return false; } +static inline int fprobe_count_ips_from_filter(const char *filter, const char *notfilter) +{ + return -EOPNOTSUPP; +} #endif /** -- cgit v1.2.3 From fbe346ce9d626680a4dd0f079e17c7b5dd32ffad Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 27 Jun 2025 13:26:23 -0700 Subject: net: mana: Handle Reset Request from MANA NIC Upon receiving the Reset Request, pause the connection and clean up queues, wait for the specified period, then resume the NIC. In the cleanup phase, the HWC is no longer responding, so set hwc_timeout to zero to skip waiting on the response. Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1751055983-29760-1-git-send-email-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- include/net/mana/gdma.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 92ab85061df0..57df78cfbf82 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -62,6 +62,7 @@ enum gdma_eqe_type { GDMA_EQE_HWC_FPGA_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, GDMA_EQE_HWC_SOC_SERVICE = 134, + GDMA_EQE_HWC_RESET_REQUEST = 135, GDMA_EQE_RNIC_QP_FATAL = 176, }; @@ -584,6 +585,9 @@ enum { /* Driver supports dynamic MSI-X vector allocation */ #define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13) +/* Driver can self reset on EQE notification */ +#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14) + /* Driver can self reset on FPGA Reconfig EQE notification */ #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) @@ -594,6 +598,7 @@ enum { GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \ + GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) #define GDMA_DRV_CAP_FLAGS2 0 @@ -921,4 +926,9 @@ void mana_unregister_debugfs(void); int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event); +int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state); +int mana_gd_resume(struct pci_dev *pdev); + +bool mana_need_log(struct gdma_context *gc, int err); + #endif /* _GDMA_H */ -- cgit v1.2.3 From b4cd18f485687a2061ee7a0ce6833851fc4438da Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Tue, 1 Jul 2025 14:20:54 +0530 Subject: drm/dp: Add documentation for luminance_set Documentation for luminance_set for struct drm_edp_backlight_info was missed which causes warnings. Fixes: 2af612ad4290 ("drm/dp: Introduce new member in drm_backlight_info") Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250701085054.746408-1-suraj.kandpal@intel.com --- include/drm/display/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index c6c2ba68fb16..e7689cebe1c5 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -837,6 +837,7 @@ drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) * @lsb_reg_used: Do we also write values to the DP_EDP_BACKLIGHT_BRIGHTNESS_LSB register? * @aux_enable: Does the panel support the AUX enable cap? * @aux_set: Does the panel support setting the brightness through AUX? + * @luminance_set: Does the panel support setting the brightness through AUX using luminance values? * * This structure contains various data about an eDP backlight, which can be populated by using * drm_edp_backlight_init(). -- cgit v1.2.3 From 002d90627cdbf7efd16f0846a814889409e42f07 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 20:59:26 +0300 Subject: firmware/nvram: bcm47xx: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Reviewed-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- include/linux/bcm47xx_nvram.h | 1 - include/linux/bcm47xx_sprom.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index 7615f8d7b1ed..e4b6ce953ddb 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -7,7 +7,6 @@ #include #include -#include #include #ifdef CONFIG_BCM47XX_NVRAM diff --git a/include/linux/bcm47xx_sprom.h b/include/linux/bcm47xx_sprom.h index f8254fd53e15..40a7da3ef50e 100644 --- a/include/linux/bcm47xx_sprom.h +++ b/include/linux/bcm47xx_sprom.h @@ -5,8 +5,8 @@ #ifndef __BCM47XX_SPROM_H #define __BCM47XX_SPROM_H +#include #include -#include #include struct ssb_sprom; -- cgit v1.2.3 From 276e136bff7edcdecc6e206c81594ef06aa40743 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 30 Jun 2025 18:20:15 +0200 Subject: fs: prepare for extending file_get/setattr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We intend to add support for more xflags to selective filesystems and We cannot rely on copy_struct_from_user() to detect this extension. In preparation of extending the API, do not allow setting xflags unknown by this kernel version. Also do not pass the read-only flags and read-only field fsx_nextents to filesystem. These changes should not affect existing chattr programs that use the ioctl to get fsxattr before setting the new values. Link: https://lore.kernel.org/linux-fsdevel/20250216164029.20673-4-pali@kernel.org/ Cc: Pali Rohár Cc: Andrey Albershteyn Signed-off-by: Amir Goldstein Signed-off-by: Andrey Albershteyn Link: https://lore.kernel.org/20250630-xattrat-syscall-v6-5-c4e3bc35227b@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fileattr.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index 6030d0bf7ad3..e2a2f4ae242d 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -14,6 +14,26 @@ FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \ FS_XFLAG_PROJINHERIT) +/* Read-only inode flags */ +#define FS_XFLAG_RDONLY_MASK \ + (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR) + +/* Flags to indicate valid value of fsx_ fields */ +#define FS_XFLAG_VALUES_MASK \ + (FS_XFLAG_EXTSIZE | FS_XFLAG_COWEXTSIZE) + +/* Flags for directories */ +#define FS_XFLAG_DIRONLY_MASK \ + (FS_XFLAG_RTINHERIT | FS_XFLAG_NOSYMLINKS | FS_XFLAG_EXTSZINHERIT) + +/* Misc settable flags */ +#define FS_XFLAG_MISC_MASK \ + (FS_XFLAG_REALTIME | FS_XFLAG_NODEFRAG | FS_XFLAG_FILESTREAM) + +#define FS_XFLAGS_MASK \ + (FS_XFLAG_COMMON | FS_XFLAG_RDONLY_MASK | FS_XFLAG_VALUES_MASK | \ + FS_XFLAG_DIRONLY_MASK | FS_XFLAG_MISC_MASK) + /* * Merged interface for miscellaneous file attributes. 'flags' originates from * ext* and 'fsx_flags' from xfs. There's some overlap between the two, which -- cgit v1.2.3 From 3a0ae385f69e9b2d87c9b017c4ffb5567c015197 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Jun 2025 19:16:51 +0100 Subject: io_uring/mock: add basic infra for test mock files io_uring commands provide an ioctl style interface for files to implement file specific operations. io_uring provides many features and advanced api to commands, and it's getting hard to test as it requires specific files/devices. Add basic infrastucture for creating special mock files that will be implementing the cmd api and using various io_uring features we want to test. It'll also be useful to test some more obscure read/write/polling edge cases in the future. Suggested-by: chase xd Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/93f21b0af58c1367a2b22635d5a7d694ad0272fc.1750599274.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/mock_file.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/uapi/linux/io_uring/mock_file.h (limited to 'include') diff --git a/include/uapi/linux/io_uring/mock_file.h b/include/uapi/linux/io_uring/mock_file.h new file mode 100644 index 000000000000..a44273fd526d --- /dev/null +++ b/include/uapi/linux/io_uring/mock_file.h @@ -0,0 +1,22 @@ +#ifndef LINUX_IO_URING_MOCK_FILE_H +#define LINUX_IO_URING_MOCK_FILE_H + +#include + +struct io_uring_mock_probe { + __u64 features; + __u64 __resv[9]; +}; + +struct io_uring_mock_create { + __u32 out_fd; + __u32 flags; + __u64 __resv[15]; +}; + +enum { + IORING_MOCK_MGR_CMD_PROBE, + IORING_MOCK_MGR_CMD_CREATE, +}; + +#endif -- cgit v1.2.3 From 4aac001f780388b252534396feaf49b250eae27f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Jun 2025 19:16:52 +0100 Subject: io_uring/mock: add cmd using vectored regbufs There is a command api allowing to import vectored registered buffers, add a new mock command that uses the feature and simply copies the specified registered buffer into user space or vice versa. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/229a113fd7de6b27dbef9567f7c0bf4475c9017d.1750599274.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/mock_file.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring/mock_file.h b/include/uapi/linux/io_uring/mock_file.h index a44273fd526d..73aca477d5c8 100644 --- a/include/uapi/linux/io_uring/mock_file.h +++ b/include/uapi/linux/io_uring/mock_file.h @@ -3,6 +3,12 @@ #include +enum { + IORING_MOCK_FEAT_CMD_COPY, + + IORING_MOCK_FEAT_END, +}; + struct io_uring_mock_probe { __u64 features; __u64 __resv[9]; @@ -19,4 +25,12 @@ enum { IORING_MOCK_MGR_CMD_CREATE, }; +enum { + IORING_MOCK_CMD_COPY_REGBUF, +}; + +enum { + IORING_MOCK_COPY_FROM = 1, +}; + #endif -- cgit v1.2.3 From d1aa0346571436203a24cc3fc0c80f14cabbd630 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Jun 2025 19:16:53 +0100 Subject: io_uring/mock: add sync read/write Add support for synchronous zero read/write for mock files. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/571f3c9fe688e918256a06a722d3db6ced9ca3d5.1750599274.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/mock_file.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring/mock_file.h b/include/uapi/linux/io_uring/mock_file.h index 73aca477d5c8..de27295bb365 100644 --- a/include/uapi/linux/io_uring/mock_file.h +++ b/include/uapi/linux/io_uring/mock_file.h @@ -5,6 +5,7 @@ enum { IORING_MOCK_FEAT_CMD_COPY, + IORING_MOCK_FEAT_RW_ZERO, IORING_MOCK_FEAT_END, }; @@ -17,7 +18,8 @@ struct io_uring_mock_probe { struct io_uring_mock_create { __u32 out_fd; __u32 flags; - __u64 __resv[15]; + __u64 file_size; + __u64 __resv[14]; }; enum { -- cgit v1.2.3 From 2f71d2386f4feed5bfb9ee7b3d2c0ad953db1fa5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Jun 2025 19:16:54 +0100 Subject: io_uring/mock: allow to choose FMODE_NOWAIT Add an option to choose whether the file supports FMODE_NOWAIT, that changes the execution path io_uring request takes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1e532565b05a05b23589d237c24ee1a3d90c2fd9.1750599274.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/mock_file.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring/mock_file.h b/include/uapi/linux/io_uring/mock_file.h index de27295bb365..125949d2b5ce 100644 --- a/include/uapi/linux/io_uring/mock_file.h +++ b/include/uapi/linux/io_uring/mock_file.h @@ -6,6 +6,7 @@ enum { IORING_MOCK_FEAT_CMD_COPY, IORING_MOCK_FEAT_RW_ZERO, + IORING_MOCK_FEAT_RW_NOWAIT, IORING_MOCK_FEAT_END, }; @@ -15,6 +16,10 @@ struct io_uring_mock_probe { __u64 __resv[9]; }; +enum { + IORING_MOCK_CREATE_F_SUPPORT_NOWAIT = 1, +}; + struct io_uring_mock_create { __u32 out_fd; __u32 flags; -- cgit v1.2.3 From 0c98a44329c10bf904434524425cb42043513bd6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Jun 2025 19:16:55 +0100 Subject: io_uring/mock: support for async read/write Let the user to specify a delay to read/write request. io_uring will start a timer, return -EIOCBQUEUED and complete the request asynchronously after the delay pass. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/38f9d2e143fda8522c90a724b74630e68f9bbd16.1750599274.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/mock_file.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring/mock_file.h b/include/uapi/linux/io_uring/mock_file.h index 125949d2b5ce..c8fa77e39c68 100644 --- a/include/uapi/linux/io_uring/mock_file.h +++ b/include/uapi/linux/io_uring/mock_file.h @@ -7,6 +7,7 @@ enum { IORING_MOCK_FEAT_CMD_COPY, IORING_MOCK_FEAT_RW_ZERO, IORING_MOCK_FEAT_RW_NOWAIT, + IORING_MOCK_FEAT_RW_ASYNC, IORING_MOCK_FEAT_END, }; @@ -24,7 +25,8 @@ struct io_uring_mock_create { __u32 out_fd; __u32 flags; __u64 file_size; - __u64 __resv[14]; + __u64 rw_delay_ns; + __u64 __resv[13]; }; enum { -- cgit v1.2.3 From e448d578264a9512d38deb8c418954d5f3e20712 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Jun 2025 19:16:56 +0100 Subject: io_uring/mock: add trivial poll handler Add a flag that enables polling on the mock file. For now it's trivially says that there is always data available, it'll be extended in the future. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f16de043ec4876d65fae294fc99ade57415fba0c.1750599274.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/mock_file.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring/mock_file.h b/include/uapi/linux/io_uring/mock_file.h index c8fa77e39c68..debeee8e4527 100644 --- a/include/uapi/linux/io_uring/mock_file.h +++ b/include/uapi/linux/io_uring/mock_file.h @@ -8,6 +8,7 @@ enum { IORING_MOCK_FEAT_RW_ZERO, IORING_MOCK_FEAT_RW_NOWAIT, IORING_MOCK_FEAT_RW_ASYNC, + IORING_MOCK_FEAT_POLL, IORING_MOCK_FEAT_END, }; @@ -19,6 +20,7 @@ struct io_uring_mock_probe { enum { IORING_MOCK_CREATE_F_SUPPORT_NOWAIT = 1, + IORING_MOCK_CREATE_F_POLL = 2, }; struct io_uring_mock_create { -- cgit v1.2.3 From be7efb2d20d67f334a7de2aef77ae6c69367e646 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 30 Jun 2025 18:20:16 +0200 Subject: fs: introduce file_getattr and file_setattr syscalls Introduce file_getattr() and file_setattr() syscalls to manipulate inode extended attributes. The syscalls takes pair of file descriptor and pathname. Then it operates on inode opened accroding to openat() semantics. The struct file_attr is passed to obtain/change extended attributes. This is an alternative to FS_IOC_FSSETXATTR ioctl with a difference that file don't need to be open as we can reference it with a path instead of fd. By having this we can manipulated inode extended attributes not only on regular files but also on special ones. This is not possible with FS_IOC_FSSETXATTR ioctl as with special files we can not call ioctl() directly on the filesystem inode using fd. This patch adds two new syscalls which allows userspace to get/set extended inode attributes on special files by using parent directory and a path - *at() like syscall. CC: linux-api@vger.kernel.org CC: linux-fsdevel@vger.kernel.org CC: linux-xfs@vger.kernel.org Signed-off-by: Andrey Albershteyn Link: https://lore.kernel.org/20250630-xattrat-syscall-v6-6-c4e3bc35227b@kernel.org Acked-by: Arnd Bergmann Signed-off-by: Christian Brauner --- include/linux/syscalls.h | 7 +++++++ include/uapi/asm-generic/unistd.h | 8 +++++++- include/uapi/linux/fs.h | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e5603cc91963..77f45e5d4413 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -78,6 +78,7 @@ struct cachestat; struct statmount; struct mnt_id_req; struct xattr_args; +struct file_attr; #include #include @@ -371,6 +372,12 @@ asmlinkage long sys_removexattrat(int dfd, const char __user *path, asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); +asmlinkage long sys_file_getattr(int dfd, const char __user *filename, + struct file_attr __user *attr, size_t usize, + unsigned int at_flags); +asmlinkage long sys_file_setattr(int dfd, const char __user *filename, + struct file_attr __user *attr, size_t usize, + unsigned int at_flags); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 2892a45023af..04e0077fb4c9 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) #define __NR_open_tree_attr 467 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) + #undef __NR_syscalls -#define __NR_syscalls 468 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0098b0ce8ccb..9663dbdda181 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -148,6 +148,24 @@ struct fsxattr { unsigned char fsx_pad[8]; }; +/* + * Variable size structure for file_[sg]et_attr(). + * + * Note. This is alternative to the structure 'struct fileattr'/'struct fsxattr'. + * As this structure is passed to/from userspace with its size, this can + * be versioned based on the size. + */ +struct file_attr { + __u64 fa_xflags; /* xflags field value (get/set) */ + __u32 fa_extsize; /* extsize field value (get/set)*/ + __u32 fa_nextents; /* nextents field value (get) */ + __u32 fa_projid; /* project identifier (get/set) */ + __u32 fa_cowextsize; /* CoW extsize field value (get/set) */ +}; + +#define FILE_ATTR_SIZE_VER0 24 +#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0 + /* * Flags for the fsx_xflags field */ -- cgit v1.2.3 From 0dc7e656ddd54c3267b7cc18c1ac8ec1297ed02f Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 2 Jul 2025 14:35:23 +0200 Subject: mtd: nand: qpic-common: add defines for ECC_MODE values Add defines for the values of the ECC_MODE field of the NAND_DEV0_ECC_CFG register and change both the 'qcom-nandc' and 'spi-qpic-snand' drivers to use those instead of magic numbers. No functional changes. This is in preparation for adding 8 bit ECC strength support for the 'spi-qpic-snand' driver. Reviewed-by: Md Sadre Alam Signed-off-by: Gabor Juhos Acked-by: Miquel Raynal Link: https://patch.msgid.link/20250702-qpic-snand-8bit-ecc-v2-1-ae2c17a30bb7@gmail.com Signed-off-by: Mark Brown --- include/linux/mtd/nand-qpic-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..0d944db363cd 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -101,6 +101,8 @@ #define ECC_SW_RESET BIT(1) #define ECC_MODE 4 #define ECC_MODE_MASK GENMASK(5, 4) +#define ECC_MODE_4BIT 0 +#define ECC_MODE_8BIT 1 #define ECC_PARITY_SIZE_BYTES_BCH 8 #define ECC_PARITY_SIZE_BYTES_BCH_MASK GENMASK(12, 8) #define ECC_NUM_DATA_BYTES 16 -- cgit v1.2.3 From cb335325b1431152f696c53e32465ba192cd119a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 30 Jun 2025 12:26:39 +0300 Subject: leds: Unexport of_led_get() There are no users outside the module. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250630092639.1574860-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/leds.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index b3f0aa081064..b16b803cc1ac 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -294,7 +294,6 @@ void led_remove_lookup(struct led_lookup_data *led_lookup); struct led_classdev *__must_check led_get(struct device *dev, char *con_id); struct led_classdev *__must_check devm_led_get(struct device *dev, char *con_id); -extern struct led_classdev *of_led_get(struct device_node *np, int index); extern void led_put(struct led_classdev *led_cdev); struct led_classdev *__must_check devm_of_led_get(struct device *dev, int index); -- cgit v1.2.3 From b1fabef37bd504f378a203fd8b9227b8fa65b193 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 10:29:51 +0100 Subject: prctl: Introduce PR_MTE_STORE_ONLY PR_MTE_STORE_ONLY is used to restrict the MTE tag check for store opeartion only. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Tested-by: Mark Brown Link: https://lore.kernel.org/r/20250618092957.2069907-3-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- include/uapi/linux/prctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43dec6eed559..f6fb137c407f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -244,6 +244,8 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* MTE tag check store only */ +# define PR_MTE_STORE_ONLY (1UL << 19) /* RISC-V pointer masking tag length */ # define PR_PMLEN_SHIFT 24 # define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) -- cgit v1.2.3 From 5e4e8c1415c181ce311a0b5936ef301edd57c5d1 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 27 Jun 2025 21:42:32 +0100 Subject: dt-bindings: clock: renesas,r9a09g056/57-cpg: Add XSPI core clock Add XSPI core clock definitions to the clock bindings for the Renesas R9A09G056 and R9A09G057 SoCs. These clocks IDs are used to support XSPI interface. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250627204237.214635-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g056-cpg.h | 1 + include/dt-bindings/clock/renesas,r9a09g057-cpg.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g056-cpg.h b/include/dt-bindings/clock/renesas,r9a09g056-cpg.h index f4905b27f8d9..a9af5af9e3a1 100644 --- a/include/dt-bindings/clock/renesas,r9a09g056-cpg.h +++ b/include/dt-bindings/clock/renesas,r9a09g056-cpg.h @@ -20,5 +20,6 @@ #define R9A09G056_USB2_0_CLK_CORE0 9 #define R9A09G056_GBETH_0_CLK_PTP_REF_I 10 #define R9A09G056_GBETH_1_CLK_PTP_REF_I 11 +#define R9A09G056_SPI_CLK_SPI 12 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G056_CPG_H__ */ diff --git a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h index 884dbeb1e139..5346a898ab60 100644 --- a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h +++ b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h @@ -21,5 +21,6 @@ #define R9A09G057_USB2_0_CLK_CORE1 10 #define R9A09G057_GBETH_0_CLK_PTP_REF_I 11 #define R9A09G057_GBETH_1_CLK_PTP_REF_I 12 +#define R9A09G057_SPI_CLK_SPI 13 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ */ -- cgit v1.2.3 From 2a76193f7cc03de5b2745d069926ebc431dd5ba4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 25 Jun 2025 15:17:03 +0100 Subject: dt-bindings: clock: renesas,r9a09g077/87: Add SDHI_CLKHS clock ID Add the SDHI high-speed clock (SDHI_CLKHS) definition for the Renesas RZ/T2H (R9A09G077) and RZ/N2H (R9A09G087) SoCs. SDHI_CLKHS is used as a core clock for the SDHI IP and operates at 800MHz. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250625141705.151383-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h | 1 + include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h index f6e5f62b07c4..7ecc4f0b235a 100644 --- a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h @@ -24,5 +24,6 @@ #define R9A09G077_CLK_PCLKH 12 #define R9A09G077_CLK_PCLKM 13 #define R9A09G077_CLK_PCLKL 14 +#define R9A09G077_SDHI_CLKHS 15 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ */ diff --git a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h index f28166d6015f..925e57703925 100644 --- a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h @@ -24,5 +24,6 @@ #define R9A09G087_CLK_PCLKH 12 #define R9A09G087_CLK_PCLKM 13 #define R9A09G087_CLK_PCLKL 14 +#define R9A09G087_SDHI_CLKHS 15 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G087_CPG_H__ */ -- cgit v1.2.3 From 3715b5df09b92168a4492b48bb7ea70d89f9d8f3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 09:35:37 +0000 Subject: net: add struct net_aligned_data This structure will hold networking data that must consume a full cache line to avoid accidental false sharing. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250630093540.3052835-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/aligned_data.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/net/aligned_data.h (limited to 'include') diff --git a/include/net/aligned_data.h b/include/net/aligned_data.h new file mode 100644 index 000000000000..cf3329d7c227 --- /dev/null +++ b/include/net/aligned_data.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_ALIGNED_DATA_H +#define _NET_ALIGNED_DATA_H + +#include + +/* Structure holding cacheline aligned fields on SMP builds. + * Each field or group should have an ____cacheline_aligned_in_smp + * attribute to ensure no accidental false sharing can happen. + */ +struct net_aligned_data { +}; + +extern struct net_aligned_data net_aligned_data; + +#endif /* _NET_ALIGNED_DATA_H */ -- cgit v1.2.3 From 998642e999d23324c5dbf38149606d09cec2c377 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 09:35:38 +0000 Subject: net: move net_cookie into net_aligned_data Using per-cpu data for net->net_cookie generation is overkill, because even busy hosts do not create hundreds of netns per second. Make sure to put net_cookie in a private cache line to avoid potential false sharing. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250630093540.3052835-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/aligned_data.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/aligned_data.h b/include/net/aligned_data.h index cf3329d7c227..5c7badf71f04 100644 --- a/include/net/aligned_data.h +++ b/include/net/aligned_data.h @@ -2,6 +2,7 @@ #ifndef _NET_ALIGNED_DATA_H #define _NET_ALIGNED_DATA_H +#include #include /* Structure holding cacheline aligned fields on SMP builds. @@ -9,6 +10,7 @@ * attribute to ensure no accidental false sharing can happen. */ struct net_aligned_data { + atomic64_t net_cookie ____cacheline_aligned_in_smp; }; extern struct net_aligned_data net_aligned_data; -- cgit v1.2.3 From 83081337419cb692eca4ee475d936b1fdcfd49f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 09:35:39 +0000 Subject: tcp: move tcp_memory_allocated into net_aligned_data ____cacheline_aligned_in_smp attribute only makes sure to align a field to a cache line. It does not prevent the linker to use the remaining of the cache line for other variables, causing potential false sharing. Move tcp_memory_allocated into a dedicated cache line. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250630093540.3052835-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/aligned_data.h | 3 +++ include/net/tcp.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/aligned_data.h b/include/net/aligned_data.h index 5c7badf71f04..bedb4f86b0fe 100644 --- a/include/net/aligned_data.h +++ b/include/net/aligned_data.h @@ -11,6 +11,9 @@ */ struct net_aligned_data { atomic64_t net_cookie ____cacheline_aligned_in_smp; +#if defined(CONFIG_INET) + atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; +#endif }; extern struct net_aligned_data net_aligned_data; diff --git a/include/net/tcp.h b/include/net/tcp.h index 761c4a0ad386..bc08de49805c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -267,7 +267,6 @@ extern long sysctl_tcp_mem[3]; #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ #define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */ -extern atomic_long_t tcp_memory_allocated; DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); extern struct percpu_counter tcp_sockets_allocated; -- cgit v1.2.3 From e3d4825124bce0d1f72187fabcf972b7c0b6cb9b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 09:35:40 +0000 Subject: udp: move udp_memory_allocated into net_aligned_data ____cacheline_aligned_in_smp attribute only makes sure to align a field to a cache line. It does not prevent the linker to use the remaining of the cache line for other variables, causing potential false sharing. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250630093540.3052835-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/aligned_data.h | 1 + include/net/udp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/aligned_data.h b/include/net/aligned_data.h index bedb4f86b0fe..e1a1c8aedc79 100644 --- a/include/net/aligned_data.h +++ b/include/net/aligned_data.h @@ -13,6 +13,7 @@ struct net_aligned_data { atomic64_t net_cookie ____cacheline_aligned_in_smp; #if defined(CONFIG_INET) atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; + atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; #endif }; diff --git a/include/net/udp.h b/include/net/udp.h index a772510b2aa5..f8ae2c4ade14 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -205,7 +205,6 @@ static inline void udp_hash4_dec(struct udp_hslot *hslot2) extern struct proto udp_prot; -extern atomic_long_t udp_memory_allocated; DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ -- cgit v1.2.3 From 8a402bbe54760dea67f1b2980c727761b47994d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:25 +0000 Subject: net: dst: annotate data-races around dst->obsolete (dst_entry)->obsolete is read locklessly, add corresponding annotations. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 78c78cdce0e9..76c30c3b22dd 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -476,7 +476,7 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, u32)); static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) { - if (dst->obsolete) + if (READ_ONCE(dst->obsolete)) dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie); return dst; -- cgit v1.2.3 From 36229b2caca2228b834c03fb83867022485a0563 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:26 +0000 Subject: net: dst: annotate data-races around dst->expires (dst_entry)->expires is read and written locklessly, add corresponding annotations. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 8 +++++--- include/net/ip.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 76c30c3b22dd..1efe1e5d51a9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -431,13 +431,15 @@ static inline void dst_link_failure(struct sk_buff *skb) static inline void dst_set_expires(struct dst_entry *dst, int timeout) { - unsigned long expires = jiffies + timeout; + unsigned long old, expires = jiffies + timeout; if (expires == 0) expires = 1; - if (dst->expires == 0 || time_before(expires, dst->expires)) - dst->expires = expires; + old = READ_ONCE(dst->expires); + + if (!old || time_before(expires, old)) + WRITE_ONCE(dst->expires, expires); } static inline unsigned int dst_dev_overhead(struct dst_entry *dst, diff --git a/include/net/ip.h b/include/net/ip.h index 375304bb99f6..391af454422e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -477,7 +477,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; - if (mtu && time_before(jiffies, rt->dst.expires)) + if (mtu && time_before(jiffies, READ_ONCE(rt->dst.expires))) goto out; } -- cgit v1.2.3 From 8f2b2282d04a5d5bcbec22f91572bb6803cfc771 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:27 +0000 Subject: net: dst: annotate data-races around dst->lastuse (dst_entry)->lastuse is read and written locklessly, add corresponding annotations. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 1efe1e5d51a9..bef2f41c7220 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -240,9 +240,9 @@ static inline void dst_hold(struct dst_entry *dst) static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) { - if (unlikely(time != dst->lastuse)) { + if (unlikely(time != READ_ONCE(dst->lastuse))) { dst->__use++; - dst->lastuse = time; + WRITE_ONCE(dst->lastuse, time); } } -- cgit v1.2.3 From f1c5fd34891a1c242885f48c2e4dc52df180f311 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:28 +0000 Subject: net: dst: annotate data-races around dst->input dst_dev_put() can overwrite dst->input while other cpus might read this field (for instance from dst_input()) Add READ_ONCE()/WRITE_ONCE() annotations to suppress potential issues. We will likely need full RCU protection later. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 2 +- include/net/lwtunnel.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index bef2f41c7220..c0f8b6d8e707 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -468,7 +468,7 @@ INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - return INDIRECT_CALL_INET(skb_dst(skb)->input, + return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input), ip6_input, ip_local_deliver, skb); } diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index c306ebe379a0..eaac07d50595 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -142,8 +142,8 @@ static inline void lwtunnel_set_redirect(struct dst_entry *dst) dst->output = lwtunnel_output; } if (lwtunnel_input_redirect(dst->lwtstate)) { - dst->lwtstate->orig_input = dst->input; - dst->input = lwtunnel_input; + dst->lwtstate->orig_input = READ_ONCE(dst->input); + WRITE_ONCE(dst->input, lwtunnel_input); } } #else -- cgit v1.2.3 From 2dce8c52a98995c4719def6f88629ab1581c0b82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:29 +0000 Subject: net: dst: annotate data-races around dst->output dst_dev_put() can overwrite dst->output while other cpus might read this field (for instance from dst_output()) Add READ_ONCE()/WRITE_ONCE() annotations to suppress potential issues. We will likely need RCU protection in the future. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 2 +- include/net/lwtunnel.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index c0f8b6d8e707..b6acfde7d587 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -458,7 +458,7 @@ INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, /* Output packet to network from transport. */ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - return INDIRECT_CALL_INET(skb_dst(skb)->output, + return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output), ip6_output, ip_output, net, sk, skb); } diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index eaac07d50595..26232f603e33 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -138,8 +138,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, static inline void lwtunnel_set_redirect(struct dst_entry *dst) { if (lwtunnel_output_redirect(dst->lwtstate)) { - dst->lwtstate->orig_output = dst->output; - dst->output = lwtunnel_output; + dst->lwtstate->orig_output = READ_ONCE(dst->output); + WRITE_ONCE(dst->output, lwtunnel_output); } if (lwtunnel_input_redirect(dst->lwtstate)) { dst->lwtstate->orig_input = READ_ONCE(dst->input); -- cgit v1.2.3 From 88fe14253e181878c2ddb51a298ae8c468a63010 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:30 +0000 Subject: net: dst: add four helpers to annotate data-races around dst->dev dst->dev is read locklessly in many contexts, and written in dst_dev_put(). Fixing all the races is going to need many changes. We probably will have to add full RCU protection. Add three helpers to ease this painful process. static inline struct net_device *dst_dev(const struct dst_entry *dst) { return READ_ONCE(dst->dev); } static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) { return dst_dev(skb_dst(skb)); } static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) { return dev_net(skb_dst_dev(skb)); } static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) { return dev_net_rcu(skb_dst_dev(skb)); } Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index b6acfde7d587..00467c1b5093 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -563,6 +563,26 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } +static inline struct net_device *dst_dev(const struct dst_entry *dst) +{ + return READ_ONCE(dst->dev); +} + +static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) +{ + return dst_dev(skb_dst(skb)); +} + +static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) +{ + return dev_net(skb_dst_dev(skb)); +} + +static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) +{ + return dev_net_rcu(skb_dst_dev(skb)); +} + struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh); -- cgit v1.2.3 From a74fc62eec155ca5a6da8ff3856f3dc87fe24558 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:31 +0000 Subject: ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu] Use the new helpers as a first step to deal with potential dst->dev races. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 2 +- include/net/ip.h | 11 ++++++----- include/net/route.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ae09e91398a5..19dbd9081d5a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -481,7 +481,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const int sdif, bool *refcounted) { - struct net *net = dev_net_rcu(skb_dst(skb)->dev); + struct net *net = skb_dst_dev_net_rcu(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *sk; diff --git a/include/net/ip.h b/include/net/ip.h index 391af454422e..befcba575129 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -472,7 +472,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -486,7 +486,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, if (mtu) goto out; - mtu = READ_ONCE(dst->dev->mtu); + mtu = READ_ONCE(dst_dev(dst)->mtu); if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) @@ -506,16 +506,17 @@ out: static inline unsigned int ip_skb_dst_mtu(struct sock *sk, const struct sk_buff *skb) { + const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; - return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); + return ip_dst_mtu_maybe_forward(dst, forwarding); } - mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); - return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); + mtu = min(READ_ONCE(dst_dev(dst)->mtu), IP_MAX_MTU); + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, diff --git a/include/net/route.h b/include/net/route.h index 3d3d6048ffca..7ea840daa775 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) const struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } -- cgit v1.2.3 From 1caf27297215a5241f9bfc9c07336349d9034ee3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:32 +0000 Subject: ipv6: adopt dst_dev() helper Use the new helper as a step to deal with potential dst->dev races. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip6_route.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6dbdf60b342f..9255f21818ee 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -274,7 +274,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) unsigned int mtu; if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { - mtu = READ_ONCE(dst->dev->mtu); + mtu = READ_ONCE(dst_dev(dst)->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { mtu = dst_mtu(dst); @@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst mtu = IPV6_MIN_MTU; rcu_read_lock(); - idev = __in6_dev_get(dst->dev); + idev = __in6_dev_get(dst_dev(dst)); if (idev) mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); -- cgit v1.2.3 From 93d1cff35adc522a5d21e722eee1071f3f7dc716 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:33 +0000 Subject: ipv6: adopt skb_dst_dev() and skb_dst_dev_net[_rcu]() helpers Use the new helpers as a step to deal with potential dst->dev races. v2: fix typo in ipv6_rthdr_rcv() (kernel test robot ) Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet6_hashtables.h | 2 +- include/net/ip6_tunnel.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index c32878c69179..ab3929a2a956 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -150,7 +150,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, int iif, int sdif, bool *refcounted) { - struct net *net = dev_net_rcu(skb_dst(skb)->dev); + struct net *net = skb_dst_dev_net_rcu(skb); const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct sock *sk; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index dd163495f353..120db2865811 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -159,7 +159,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->flags = ip6cb_flags; pkt_len = skb->len - skb_inner_network_offset(skb); - err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); + err = ip6_local_out(skb_dst_dev_net(skb), sk, skb); if (dev) { if (unlikely(net_xmit_eval(err))) -- cgit v1.2.3 From 4d313f2bd22213caace3fe4fb02977b527f9c6c3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 1 Jul 2025 09:03:51 +0800 Subject: tun: remove unnecessary tun_xdp_hdr structure With f95f0f95cfb7("net, xdp: Introduce xdp_init_buff utility routine"), buffer length could be stored as frame size so there's no need to have a dedicated tun_xdp_hdr structure. We can simply store virtio net header instead. Acked-by: Willem de Bruijn Signed-off-by: Jason Wang Link: https://patch.msgid.link/20250701010352.74515-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/if_tun.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 043d442994b0..80166eb62f41 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -19,11 +19,6 @@ struct tun_msg_ctl { void *ptr; }; -struct tun_xdp_hdr { - int buflen; - struct virtio_net_hdr gso; -}; - #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); -- cgit v1.2.3 From 42401c42389622424f2973ec57414f033ae6be8f Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 29 Jun 2025 17:21:31 +0300 Subject: netlink: introduce type-checking attribute iteration for nlmsg Add the nlmsg_for_each_attr_type() macro to simplify iteration over attributes of a specific type in a Netlink message. Convert existing users in vxlan and nfsd to use the new macro. Suggested-by: Jakub Kicinski Signed-off-by: Carolina Jubran Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250629142138.361537-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 90a560dc167a..1a8356ca4b78 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -68,6 +68,8 @@ * nlmsg_for_each_msg() loop over all messages * nlmsg_validate() validate netlink message incl. attrs * nlmsg_for_each_attr() loop over all attributes + * nlmsg_for_each_attr_type() loop over all attributes with the + * given type * * Misc: * nlmsg_report() report back to application? @@ -966,6 +968,18 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \ nlmsg_attrlen(nlh, hdrlen), rem) +/** + * nlmsg_for_each_attr_type - iterate over a stream of attributes + * @pos: loop counter, set to the current attribute + * @type: required attribute type for @pos + * @nlh: netlink message header + * @hdrlen: length of the family specific header + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_attr_type(pos, type, nlh, hdrlen, rem) \ + nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ + if (nla_type(pos) == type) + /** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in -- cgit v1.2.3 From 566e8f108fc7847f2a8676ec6a101d37b7dd0fb4 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 29 Jun 2025 17:21:32 +0300 Subject: devlink: Extend devlink rate API with traffic classes bandwidth management Introduce support for specifying relative bandwidth shares between traffic classes (TC) in the devlink-rate API. This new option allows users to allocate bandwidth across multiple traffic classes in a single command. This feature provides a more granular control over traffic management, especially for scenarios requiring Enhanced Transmission Selection. Users can now define a relative bandwidth share for each traffic class. For example, assigning share values of 20 to TC0 (TCP/UDP) and 80 to TC5 (RoCE) will result in TC0 receiving 20% and TC5 receiving 80% of the total bandwidth. The actual percentage each class receives depends on the ratio of its share value to the sum of all shares. Example: DEV=pci/0000:08:00.0 $ devlink port function rate add $DEV/vfs_group tx_share 10Gbit \ tx_max 50Gbit tc-bw 0:20 1:0 2:0 3:0 4:0 5:80 6:0 7:0 $ devlink port function rate set $DEV/vfs_group \ tc-bw 0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0 Example usage with ynl: ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-set --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1, "rate-tc-bws": [ {"rate-tc-index": 0, "rate-tc-bw": 50}, {"rate-tc-index": 1, "rate-tc-bw": 50}, {"rate-tc-index": 2, "rate-tc-bw": 0}, {"rate-tc-index": 3, "rate-tc-bw": 0}, {"rate-tc-index": 4, "rate-tc-bw": 0}, {"rate-tc-index": 5, "rate-tc-bw": 0}, {"rate-tc-index": 6, "rate-tc-bw": 0}, {"rate-tc-index": 7, "rate-tc-bw": 0} ] }' ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-get --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1 }' output for rate-get: {'bus-name': 'pci', 'dev-name': '0000:08:00.0', 'port-index': 1, 'rate-tc-bws': [{'rate-tc-bw': 50, 'rate-tc-index': 0}, {'rate-tc-bw': 50, 'rate-tc-index': 1}, {'rate-tc-bw': 0, 'rate-tc-index': 2}, {'rate-tc-bw': 0, 'rate-tc-index': 3}, {'rate-tc-bw': 0, 'rate-tc-index': 4}, {'rate-tc-bw': 0, 'rate-tc-index': 5}, {'rate-tc-bw': 0, 'rate-tc-index': 6}, {'rate-tc-bw': 0, 'rate-tc-index': 7}], 'rate-tx-max': 0, 'rate-tx-priority': 0, 'rate-tx-share': 0, 'rate-tx-weight': 0, 'rate-type': 'leaf'} Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250629142138.361537-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 8 ++++++++ include/uapi/linux/devlink.h | 9 +++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 63517646a497..d0ce5a7e984c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -118,6 +118,8 @@ struct devlink_rate { u32 tx_priority; u32 tx_weight; + + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; }; struct devlink_port { @@ -1486,6 +1488,9 @@ struct devlink_ops { u32 tx_priority, struct netlink_ext_ack *extack); int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv, u32 tx_weight, struct netlink_ext_ack *extack); + int (*rate_leaf_tc_bw_set)(struct devlink_rate *devlink_rate, + void *priv, u32 *tc_bw, + struct netlink_ext_ack *extack); int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, @@ -1494,6 +1499,9 @@ struct devlink_ops { u32 tx_priority, struct netlink_ext_ack *extack); int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv, u32 tx_weight, struct netlink_ext_ack *extack); + int (*rate_node_tc_bw_set)(struct devlink_rate *devlink_rate, + void *priv, u32 *tc_bw, + struct netlink_ext_ack *extack); int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index a5ee0f13740a..e72bcc239afd 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -221,6 +221,11 @@ enum devlink_port_flavour { */ }; +/* IEEE 802.1Qaz standard supported values. */ + +#define DEVLINK_RATE_TCS_MAX 8 +#define DEVLINK_RATE_TC_INDEX_MAX (DEVLINK_RATE_TCS_MAX - 1) + enum devlink_rate_type { DEVLINK_RATE_TYPE_LEAF, DEVLINK_RATE_TYPE_NODE, @@ -629,6 +634,10 @@ enum devlink_attr { DEVLINK_ATTR_REGION_DIRECT, /* flag */ + DEVLINK_ATTR_RATE_TC_BWS, /* nested */ + DEVLINK_ATTR_RATE_TC_INDEX, /* u8 */ + DEVLINK_ATTR_RATE_TC_BW, /* u32 */ + /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate * net/devlink/netlink_gen.c. -- cgit v1.2.3 From cf73d9970ea4f8cace5d8f02d2565a2723003112 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 2 Jul 2025 21:31:54 +0100 Subject: io_uring: don't use int for ABI __kernel_rwf_t is defined as int, the actual size of which is implementation defined. It won't go well if some compiler / archs ever defines it as i64, so replace it with __u32, hoping that there is no one using i16 for it. Cc: stable@vger.kernel.org Fixes: 2b188cc1bb857 ("Add io_uring IO interface") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/47c666c4ee1df2018863af3a2028af18feef11ed.1751412511.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b6be063693c8..b8a0e70ee2fd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -50,7 +50,7 @@ struct io_uring_sqe { }; __u32 len; /* buffer size or number of iovecs */ union { - __kernel_rwf_t rw_flags; + __u32 rw_flags; __u32 fsync_flags; __u16 poll_events; /* compatibility */ __u32 poll32_events; /* word-reversed for BE */ -- cgit v1.2.3 From 9fd45235fdd2c2615a03c86ebe5a88b050dc5680 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 May 2024 16:32:51 -0400 Subject: add locked_recursive_removal() simple_recursive_removal() assumes that parent is not locked and locks it when it finally gets to removing the victim itself. Usually that's what we want, but there are places where the parent is *already* locked and we need it to stay that way. In those cases simple_recursive_removal() would, of course, deadlock, so we have to play racy games with unlocking/relocking the parent around the call or open-code the entire thing. A better solution is to provide a variant that expects to be called with the parent already locked by the caller. Parent should be locked with I_MUTEX_PARENT, to avoid false positives from lockdep. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..4f0c6bf8d652 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3595,6 +3595,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); +extern void locked_recursive_removal(struct dentry *, + void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); -- cgit v1.2.3 From 59200f45267481582f4e42334a510f01d0b89449 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2025 21:06:11 -0400 Subject: new helper: simple_start_creating() Set the things up for kernel-initiated creation of object in a tree-in-dcache filesystem. With respect to locking it's an equivalent of filename_create() - we either get a negative dentry with locked parent, or ERR_PTR() and no locks taken. tracefs and debugfs had that open-coded as part of their object creation machinery; switched to calling new helper. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..9f75f8836bbd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3619,6 +3619,7 @@ extern int simple_fill_super(struct super_block *, unsigned long, const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); +struct dentry *simple_start_creating(struct dentry *, const char *); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); -- cgit v1.2.3 From bccea4ed060f1f6476ac7a0649ffa73f77d6e94c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Feb 2024 00:24:19 -0500 Subject: rpc_unlink(): saner calling conventions 1) pass it pipe instead of pipe->dentry 2) zero pipe->dentry afterwards 3) it always returns 0; why bother? Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 3b35b6f6533a..a8c0a500d55c 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -129,7 +129,7 @@ struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, struct rpc_pipe *); -extern int rpc_unlink(struct dentry *); +extern void rpc_unlink(struct rpc_pipe *); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); -- cgit v1.2.3 From 19a6314a997f6adde0c100ecf9224d1ab43c9603 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Feb 2024 02:41:59 -0500 Subject: rpc_mkpipe_dentry(): saner calling conventions Instead of returning a dentry or ERR_PTR(-E...), return 0 and store dentry into pipe->dentry on success and return -E... on failure. Callers are happier that way... NOTE: dummy rpc_pipe is getting ->dentry set; we never access that, since we 1) never call rpc_unlink() for it (dentry is taken out by ->kill_sb()) 2) never call rpc_queue_upcall() for it (writing to that sucker fails; no downcalls are ever submitted, so no replies are going to arrive) IOW, having that ->dentry set (and left dangling) is harmless, if ugly; cleaner solution will take more massage. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a8c0a500d55c..8cc3a5df9801 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -127,7 +127,7 @@ extern void rpc_remove_cache_dir(struct dentry *); struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); -extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, +extern int rpc_mkpipe_dentry(struct dentry *, const char *, void *, struct rpc_pipe *); extern void rpc_unlink(struct rpc_pipe *); extern int register_rpc_pipefs(void); -- cgit v1.2.3 From 350db61fbeb940502a16e74153ee5954d03622e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Mar 2025 00:51:10 -0400 Subject: rpc_create_client_dir(): return 0 or -E... Callers couldn't care less which dentry did we get - anything valid is treated as success. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 8cc3a5df9801..2cb406f8ff4e 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -98,7 +98,7 @@ static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { } struct rpc_clnt; -extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); +extern int rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct rpc_clnt *); extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh); -- cgit v1.2.3 From aa89281bbc0b61610c96074c6390aed44474ebd0 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Thu, 12 Jun 2025 16:58:21 +0200 Subject: media: pisp_be: Use clamp() and define max sizes Use the clamp() function from minmax.h and provide a define for the max sizes as they will be used in subsequent patches. Reviewed-by: Daniel Scally Reviewed-by: Stefan Klug Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_be_config.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index cbeb714f4d61..2ad3b90684d7 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -21,10 +21,11 @@ /* preferred byte alignment for outputs */ #define PISP_BACK_END_OUTPUT_MAX_ALIGN 64u -/* minimum allowed tile width anywhere in the pipeline */ -#define PISP_BACK_END_MIN_TILE_WIDTH 16u -/* minimum allowed tile width anywhere in the pipeline */ -#define PISP_BACK_END_MIN_TILE_HEIGHT 16u +/* minimum allowed tile sizes anywhere in the pipeline */ +#define PISP_BACK_END_MIN_TILE_WIDTH 16u +#define PISP_BACK_END_MIN_TILE_HEIGHT 16u +#define PISP_BACK_END_MAX_TILE_WIDTH 65536u +#define PISP_BACK_END_MAX_TILE_HEIGHT 65536u #define PISP_BACK_END_NUM_OUTPUTS 2 #define PISP_BACK_END_HOG_OUTPUT 1 -- cgit v1.2.3 From 78584431e2cea6b60909cfa23c90ac8b33ab4198 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Mon, 30 Jun 2025 23:27:34 +0100 Subject: media: v4l2: Add Renesas Camera Receiver Unit pixel formats The Renesas Camera Receiver Unit in the RZ/V2H SoC can output RAW data captured from an image sensor without conversion to an RGB/YUV format. In that case the data are packed into 64-bit blocks, with a variable amount of padding in the most significant bits depending on the bitdepth of the data. Add new V4L2 pixel format codes for the new formats, along with documentation to describe them. Reviewed-by: Lad Prabhakar Reviewed-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20250630222734.2712390-1-dan.scally@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9e3b366d5fc7..6f7bd38dd5aa 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -840,6 +840,12 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_PISP_COMP2_BGGR v4l2_fourcc('P', 'C', '2', 'B') /* PiSP 8-bit mode 2 compressed BGGR bayer */ #define V4L2_PIX_FMT_PISP_COMP2_MONO v4l2_fourcc('P', 'C', '2', 'M') /* PiSP 8-bit mode 2 compressed monochrome */ +/* Renesas RZ/V2H CRU packed formats. 64-bit units with contiguous pixels */ +#define V4L2_PIX_FMT_RAW_CRU10 v4l2_fourcc('C', 'R', '1', '0') +#define V4L2_PIX_FMT_RAW_CRU12 v4l2_fourcc('C', 'R', '1', '2') +#define V4L2_PIX_FMT_RAW_CRU14 v4l2_fourcc('C', 'R', '1', '4') +#define V4L2_PIX_FMT_RAW_CRU20 v4l2_fourcc('C', 'R', '2', '0') + /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ -- cgit v1.2.3 From 8a67d4b49bbdebcd255abde9e652092c3de3b657 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:19 -0700 Subject: platform/x86/intel/vsec: Add device links to enforce dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New Intel VSEC features will have dependencies on other features, requiring certain supplier drivers to be probed before their consumers. To enforce this dependency ordering, introduce device links using device_link_add(), ensuring that suppliers are fully registered before consumers are probed. - Add device link tracking by storing supplier devices and tracking their state. - Implement intel_vsec_link_devices() to establish links between suppliers and consumers based on feature dependencies. - Add get_consumer_dependencies() to retrieve supplier-consumer relationships. - Modify feature registration logic: * Consumers now check that all required suppliers are registered before being initialized. * suppliers_ready() verifies that all required supplier devices are available. - Prevent potential null consumer name issue in sysfs: - Use dev_set_name() when creating auxiliary devices to ensure a unique, non-null consumer name. - Update intel_vsec_pci_probe() to loop up to the number of possible features or when all devices are registered, whichever comes first. - Introduce VSEC_CAP_UNUSED to prevent sub-features (registered via exported APIs) from being mistakenly linked. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250703022832.1302928-5-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index bc95821f1bfb..71067afaca99 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -5,11 +5,18 @@ #include #include -#define VSEC_CAP_TELEMETRY BIT(0) -#define VSEC_CAP_WATCHER BIT(1) -#define VSEC_CAP_CRASHLOG BIT(2) -#define VSEC_CAP_SDSI BIT(3) -#define VSEC_CAP_TPMI BIT(4) +/* + * VSEC_CAP_UNUSED is reserved. It exists to prevent zero initialized + * intel_vsec devices from being automatically set to a known + * capability with ID 0 + */ +#define VSEC_CAP_UNUSED BIT(0) +#define VSEC_CAP_TELEMETRY BIT(1) +#define VSEC_CAP_WATCHER BIT(2) +#define VSEC_CAP_CRASHLOG BIT(3) +#define VSEC_CAP_SDSI BIT(4) +#define VSEC_CAP_TPMI BIT(5) +#define VSEC_FEATURE_COUNT 6 /* Intel DVSEC offsets */ #define INTEL_DVSEC_ENTRIES 0xA @@ -81,22 +88,31 @@ struct pmt_callbacks { int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, loff_t off, u32 count); }; +struct vsec_feature_dependency { + unsigned long feature; + unsigned long supplier_bitmap; +}; + /** * struct intel_vsec_platform_info - Platform specific data * @parent: parent device in the auxbus chain * @headers: list of headers to define the PMT client devices to create + * @deps: array of feature dependencies * @priv_data: private data, usable by parent devices, currently a callback * @caps: bitmask of PMT capabilities for the given headers * @quirks: bitmask of VSEC device quirks * @base_addr: allow a base address to be specified (rather than derived) + * @num_deps: Count feature dependencies */ struct intel_vsec_platform_info { struct device *parent; struct intel_vsec_header **headers; + const struct vsec_feature_dependency *deps; void *priv_data; unsigned long caps; unsigned long quirks; u64 base_addr; + int num_deps; }; /** @@ -110,6 +126,7 @@ struct intel_vsec_platform_info { * @priv_data: any private data needed * @quirks: specified quirks * @base_addr: base address of entries (if specified) + * @cap_id: the enumerated id of the vsec feature */ struct intel_vsec_device { struct auxiliary_device auxdev; @@ -122,6 +139,7 @@ struct intel_vsec_device { size_t priv_data_size; unsigned long quirks; u64 base_addr; + unsigned long cap_id; }; int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent, -- cgit v1.2.3 From 10f32796e86c04f73b7f8580cc9483765ed19f49 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:22 -0700 Subject: platform/x86/intel/vsec: Add new Discovery feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the PCIe VSEC ID for new Intel Platform Monitoring Technology Capability Discovery feature. Discovery provides detailed information for the various Intel VSEC features. Also make the driver a supplier for TPMI and Telemetry drivers which will use the information. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250703022832.1302928-8-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index 71067afaca99..a07796d7d43b 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -16,7 +16,8 @@ #define VSEC_CAP_CRASHLOG BIT(3) #define VSEC_CAP_SDSI BIT(4) #define VSEC_CAP_TPMI BIT(5) -#define VSEC_FEATURE_COUNT 6 +#define VSEC_CAP_DISCOVERY BIT(6) +#define VSEC_FEATURE_COUNT 7 /* Intel DVSEC offsets */ #define INTEL_DVSEC_ENTRIES 0xA @@ -33,6 +34,7 @@ enum intel_vsec_id { VSEC_ID_TELEMETRY = 2, VSEC_ID_WATCHER = 3, VSEC_ID_CRASHLOG = 4, + VSEC_ID_DISCOVERY = 12, VSEC_ID_SDSI = 65, VSEC_ID_TPMI = 66, }; -- cgit v1.2.3 From d9a0788093565c300f7c8dd034dbfa6ac4da9aa6 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:23 -0700 Subject: platform/x86/intel/pmt: Add PMT Discovery driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a new driver to enumerate and expose Intel Platform Monitoring Technology (PMT) capabilities via a simple discovery mechanism. The PMT Discovery driver parses hardware-provided discovery tables from Intel Out of Band Management Services Modules (OOBMSM) and extracts feature information for various providers (such as TPMI, Telemetry, Crash Log, etc). This unified interface simplifies the process of determining which manageability and telemetry features are supported by a given platform. This new feature is described in the Intel Platform Monitoring Technology 3.0 specification, section 6.6 Capability. Key changes and additions: New file drivers/platform/x86/intel/pmt/discovery.c: – Implements the discovery logic to map the discovery resource, read the feature discovery table, and validate feature parameters. New file drivers/platform/x86/intel/pmt/features.c: – Defines feature names, layouts, and associated capability masks. – Provides a mapping between raw hardware attributes and sysfs representations for easier integration with user-space tools. New header include/linux/intel_pmt_features.h: – Declares constants, masks, and feature identifiers used across the PMT framework. Sysfs integration: – Feature attributes are exposed under /sys/class/intel_pmt. – Each device is represented by a subfolder within the intel_pmt class, named using its DBDF (Domain:Bus:Device.Function), e.g.: features-0000:00:03.1 – Example directory layout for a device: /sys/class/intel_pmt/features-0000:00:03.1/ ├── accelerator_telemetry ├── crash_log ├── per_core_environment_telemetry ├── per_core_performance_telemetry ├── per_rmid_energy_telemetry ├── per_rmid_perf_telemetry ├── tpmi_control ├── tracing └── uncore_telemetry By exposing PMT feature details through sysfs and integrating with the existing PMT class, this driver paves the way for more streamlined integration of PMT-based manageability and telemetry tools. Link: https://www.intel.com/content/www/us/en/content-details/710389/intel-platform-monitoring-technology-intel-pmt-external-specification.html Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250703022832.1302928-9-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_pmt_features.h | 157 +++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 include/linux/intel_pmt_features.h (limited to 'include') diff --git a/include/linux/intel_pmt_features.h b/include/linux/intel_pmt_features.h new file mode 100644 index 000000000000..53573a4a49b7 --- /dev/null +++ b/include/linux/intel_pmt_features.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _FEATURES_H +#define _FEATURES_H + +#include +#include + +/* Common masks */ +#define PMT_CAP_TELEM BIT(0) +#define PMT_CAP_WATCHER BIT(1) +#define PMT_CAP_CRASHLOG BIT(2) +#define PMT_CAP_STREAMING BIT(3) +#define PMT_CAP_THRESHOLD BIT(4) +#define PMT_CAP_WINDOW BIT(5) +#define PMT_CAP_CONFIG BIT(6) +#define PMT_CAP_TRACING BIT(7) +#define PMT_CAP_INBAND BIT(8) +#define PMT_CAP_OOB BIT(9) +#define PMT_CAP_SECURED_CHAN BIT(10) + +#define PMT_CAP_PMT_SP BIT(11) +#define PMT_CAP_PMT_SP_POLICY GENMASK(17, 12) + +/* Per Core Performance Telemetry (PCPT) specific masks */ +#define PMT_CAP_PCPT_CORE_PERF BIT(18) +#define PMT_CAP_PCPT_CORE_C0_RES BIT(19) +#define PMT_CAP_PCPT_CORE_ACTIVITY BIT(20) +#define PMT_CAP_PCPT_CACHE_PERF BIT(21) +#define PMT_CAP_PCPT_QUALITY_TELEM BIT(22) + +/* Per Core Environmental Telemetry (PCET) specific masks */ +#define PMT_CAP_PCET_WORKPOINT_HIST BIT(18) +#define PMT_CAP_PCET_CORE_CURR_TEMP BIT(19) +#define PMT_CAP_PCET_CORE_INST_RES BIT(20) +#define PMT_CAP_PCET_QUALITY_TELEM BIT(21) /* Same as PMT_CAP_PCPT */ +#define PMT_CAP_PCET_CORE_CDYN_LVL BIT(22) +#define PMT_CAP_PCET_CORE_STRESS_LVL BIT(23) +#define PMT_CAP_PCET_CORE_DAS BIT(24) +#define PMT_CAP_PCET_FIVR_HEALTH BIT(25) +#define PMT_CAP_PCET_ENERGY BIT(26) +#define PMT_CAP_PCET_PEM_STATUS BIT(27) +#define PMT_CAP_PCET_CORE_C_STATE BIT(28) + +/* Per RMID Performance Telemetry specific masks */ +#define PMT_CAP_RMID_CORES_PERF BIT(18) +#define PMT_CAP_RMID_CACHE_PERF BIT(19) +#define PMT_CAP_RMID_PERF_QUAL BIT(20) + +/* Accelerator Telemetry specific masks */ +#define PMT_CAP_ACCEL_CPM_TELEM BIT(18) +#define PMT_CAP_ACCEL_TIP_TELEM BIT(19) + +/* Uncore Telemetry specific masks */ +#define PMT_CAP_UNCORE_IO_CA_TELEM BIT(18) +#define PMT_CAP_UNCORE_RMID_TELEM BIT(19) +#define PMT_CAP_UNCORE_D2D_ULA_TELEM BIT(20) +#define PMT_CAP_UNCORE_PKGC_TELEM BIT(21) + +/* Crash Log specific masks */ +#define PMT_CAP_CRASHLOG_MAN_TRIG BIT(11) +#define PMT_CAP_CRASHLOG_CORE BIT(12) +#define PMT_CAP_CRASHLOG_UNCORE BIT(13) +#define PMT_CAP_CRASHLOG_TOR BIT(14) +#define PMT_CAP_CRASHLOG_S3M BIT(15) +#define PMT_CAP_CRASHLOG_PERSISTENCY BIT(16) +#define PMT_CAP_CRASHLOG_CLIP_GPIO BIT(17) +#define PMT_CAP_CRASHLOG_PRE_RESET BIT(18) +#define PMT_CAP_CRASHLOG_POST_RESET BIT(19) + +/* PeTe Log specific masks */ +#define PMT_CAP_PETE_MAN_TRIG BIT(11) +#define PMT_CAP_PETE_ENCRYPTION BIT(12) +#define PMT_CAP_PETE_PERSISTENCY BIT(13) +#define PMT_CAP_PETE_REQ_TOKENS BIT(14) +#define PMT_CAP_PETE_PROD_ENABLED BIT(15) +#define PMT_CAP_PETE_DEBUG_ENABLED BIT(16) + +/* TPMI control specific masks */ +#define PMT_CAP_TPMI_MAILBOX BIT(11) +#define PMT_CAP_TPMI_LOCK BIT(12) + +/* Tracing specific masks */ +#define PMT_CAP_TRACE_SRAR BIT(11) +#define PMT_CAP_TRACE_CORRECTABLE BIT(12) +#define PMT_CAP_TRACE_MCTP BIT(13) +#define PMT_CAP_TRACE_MRT BIT(14) + +/* Per RMID Energy Telemetry specific masks */ +#define PMT_CAP_RMID_ENERGY BIT(18) +#define PMT_CAP_RMID_ACTIVITY BIT(19) +#define PMT_CAP_RMID_ENERGY_QUAL BIT(20) + +enum pmt_feature_id { + FEATURE_INVALID = 0x0, + FEATURE_PER_CORE_PERF_TELEM = 0x1, + FEATURE_PER_CORE_ENV_TELEM = 0x2, + FEATURE_PER_RMID_PERF_TELEM = 0x3, + FEATURE_ACCEL_TELEM = 0x4, + FEATURE_UNCORE_TELEM = 0x5, + FEATURE_CRASH_LOG = 0x6, + FEATURE_PETE_LOG = 0x7, + FEATURE_TPMI_CTRL = 0x8, + FEATURE_RESERVED = 0x9, + FEATURE_TRACING = 0xA, + FEATURE_PER_RMID_ENERGY_TELEM = 0xB, + FEATURE_MAX = 0xB, +}; + +enum feature_layout { + LAYOUT_RMID, + LAYOUT_WATCHER, + LAYOUT_COMMAND, + LAYOUT_CAPS_ONLY, +}; + +struct pmt_cap { + u32 mask; + const char *name; +}; + +extern const char * const pmt_feature_names[]; +extern enum feature_layout feature_layout[]; +extern struct pmt_cap pmt_cap_common[]; +extern struct pmt_cap pmt_cap_pcpt[]; +extern struct pmt_cap *pmt_caps_pcpt[]; +extern struct pmt_cap pmt_cap_pcet[]; +extern struct pmt_cap *pmt_caps_pcet[]; +extern struct pmt_cap pmt_cap_rmid_perf[]; +extern struct pmt_cap *pmt_caps_rmid_perf[]; +extern struct pmt_cap pmt_cap_accel[]; +extern struct pmt_cap *pmt_caps_accel[]; +extern struct pmt_cap pmt_cap_uncore[]; +extern struct pmt_cap *pmt_caps_uncore[]; +extern struct pmt_cap pmt_cap_crashlog[]; +extern struct pmt_cap *pmt_caps_crashlog[]; +extern struct pmt_cap pmt_cap_pete[]; +extern struct pmt_cap *pmt_caps_pete[]; +extern struct pmt_cap pmt_cap_tpmi[]; +extern struct pmt_cap *pmt_caps_tpmi[]; +extern struct pmt_cap pmt_cap_s3m[]; +extern struct pmt_cap *pmt_caps_s3m[]; +extern struct pmt_cap pmt_cap_tracing[]; +extern struct pmt_cap *pmt_caps_tracing[]; +extern struct pmt_cap pmt_cap_rmid_energy[]; +extern struct pmt_cap *pmt_caps_rmid_energy[]; + +static inline bool pmt_feature_id_is_valid(enum pmt_feature_id id) +{ + if (id > FEATURE_MAX) + return false; + + if (id == FEATURE_INVALID || id == FEATURE_RESERVED) + return false; + + return true; +} +#endif -- cgit v1.2.3 From 934954df0f44de5e10afc1af84c06f78149f15fe Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:25 -0700 Subject: platform/x86/intel/tpmi: Relocate platform info to intel_vsec.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TPMI platform information provides a mapping of OOBMSM PCI devices to logical CPUs. Since this mapping is consistent across all OOBMSM features (e.g., TPMI, PMT, SDSi), it can be leveraged by multiple drivers. To facilitate reuse, relocate the struct intel_tpmi_plat_info to intel_vsec.h, renaming it to struct oobmsm_plat_info, making it accessible to other features. While modifying headers, place them in alphabetical order. Signed-off-by: David E. Box Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250703022832.1302928-11-david.e.box@linux.intel.com Signed-off-by: Ilpo Järvinen --- include/linux/intel_tpmi.h | 27 +++------------------------ include/linux/intel_vsec.h | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index ff480b47ae64..94c06bf214fb 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -8,6 +8,8 @@ #include +struct oobmsm_plat_info; + #define TPMI_VERSION_INVALID 0xff #define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val) #define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val) @@ -26,30 +28,7 @@ enum intel_tpmi_id { TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ }; -/** - * struct intel_tpmi_plat_info - Platform information for a TPMI device instance - * @cdie_mask: Mask of all compute dies in the partition - * @package_id: CPU Package id - * @partition: Package partition id when multiple VSEC PCI devices per package - * @segment: PCI segment ID - * @bus_number: PCI bus number - * @device_number: PCI device number - * @function_number: PCI function number - * - * Structure to store platform data for a TPMI device instance. This - * struct is used to return data via tpmi_get_platform_data(). - */ -struct intel_tpmi_plat_info { - u16 cdie_mask; - u8 package_id; - u8 partition; - u8 segment; - u8 bus_number; - u8 device_number; - u8 function_number; -}; - -struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); +struct oobmsm_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); int tpmi_get_resource_count(struct auxiliary_device *auxdev); int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked, diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index a07796d7d43b..cd78d0b2e623 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -144,6 +144,28 @@ struct intel_vsec_device { unsigned long cap_id; }; +/** + * struct oobmsm_plat_info - Platform information for a device instance + * @cdie_mask: Mask of all compute dies in the partition + * @package_id: CPU Package id + * @partition: Package partition id when multiple VSEC PCI devices per package + * @segment: PCI segment ID + * @bus_number: PCI bus number + * @device_number: PCI device number + * @function_number: PCI function number + * + * Structure to store platform data for a OOBMSM device instance. + */ +struct oobmsm_plat_info { + u16 cdie_mask; + u8 package_id; + u8 partition; + u8 segment; + u8 bus_number; + u8 device_number; + u8 function_number; +}; + int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent, struct intel_vsec_device *intel_vsec_dev, const char *name); -- cgit v1.2.3 From a885a2780937afac4f31f00d11663f50d05dfb35 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:26 -0700 Subject: platform/x86/intel/vsec: Set OOBMSM to CPU mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add functions, intel_vsec_set/get_mapping(), to set and retrieve the OOBMSM-to-CPU mapping data in the private data of the parent Intel VSEC driver. With this mapping information available, other Intel VSEC features on the same OOBMSM device can easily access and use the mapping data, allowing each of the OOBMSM features to map to the CPUs they provides data for. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250703022832.1302928-12-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index cd78d0b2e623..4bd0c6e7857c 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -183,11 +183,23 @@ static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device #if IS_ENABLED(CONFIG_INTEL_VSEC) int intel_vsec_register(struct pci_dev *pdev, struct intel_vsec_platform_info *info); +int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info, + struct intel_vsec_device *vsec_dev); +struct oobmsm_plat_info *intel_vsec_get_mapping(struct pci_dev *pdev); #else static inline int intel_vsec_register(struct pci_dev *pdev, struct intel_vsec_platform_info *info) { return -ENODEV; } +static inline int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info, + struct intel_vsec_device *vsec_dev) +{ + return -ENODEV; +} +static inline struct oobmsm_plat_info *intel_vsec_get_mapping(struct pci_dev *pdev) +{ + return ERR_PTR(-ENODEV); +} #endif #endif -- cgit v1.2.3 From 86fc85c75bcd9b0f28afadd60c9f890669b42ba4 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:28 -0700 Subject: platform/x86/intel/pmt/discovery: Get telemetry attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add intel_pmt_get_features() in PMT Discovery to enable the PMT Telemetry driver to obtain attributes of the aggregated telemetry spaces it enumerates. The function gathers feature flags and associated data (like the number of RMIDs) from each PMT entry, laying the groundwork for a future kernel interface that will allow direct access to telemetry regions based on their capabilities. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250703022832.1302928-14-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index 4bd0c6e7857c..f185e9c01c90 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -4,6 +4,7 @@ #include #include +#include /* * VSEC_CAP_UNUSED is reserved. It exists to prevent zero initialized @@ -166,6 +167,21 @@ struct oobmsm_plat_info { u8 function_number; }; +struct telemetry_region { + struct oobmsm_plat_info plat_info; + void __iomem *addr; + size_t size; + u32 guid; + u32 num_rmids; +}; + +struct pmt_feature_group { + enum pmt_feature_id id; + int count; + struct kref kref; + struct telemetry_region regions[]; +}; + int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent, struct intel_vsec_device *intel_vsec_dev, const char *name); -- cgit v1.2.3 From 42dabe5442887946b16e64c6ebe91d2671a96fbb Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 2 Jul 2025 19:28:29 -0700 Subject: platform/x86/intel/pmt/telemetry: Add API to retrieve telemetry regions by feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new API, intel_pmt_get_regions_by_feature(), that gathers telemetry regions based on a provided capability flag. This API enables retrieval of regions with various capabilities (for example, RMID-based telemetry) and provides a unified interface for accessing them. Resource management is handled via reference counting using intel_pmt_put_feature_group(). Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250703022832.1302928-15-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index f185e9c01c90..53f6fe88e369 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -4,6 +4,7 @@ #include #include +#include #include /* @@ -218,4 +219,21 @@ static inline struct oobmsm_plat_info *intel_vsec_get_mapping(struct pci_dev *pd return ERR_PTR(-ENODEV); } #endif + +#if IS_ENABLED(CONFIG_INTEL_PMT_TELEMETRY) +struct pmt_feature_group * +intel_pmt_get_regions_by_feature(enum pmt_feature_id id); + +void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group); +#else +static inline struct pmt_feature_group * +intel_pmt_get_regions_by_feature(enum pmt_feature_id id) +{ + return ERR_PTR(-ENODEV); +} + +static inline void +intel_pmt_put_feature_group(struct pmt_feature_group *feature_group) {} +#endif + #endif -- cgit v1.2.3 From fd72f265bb00d2dd2a3bbad7ec45520025e3a926 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 22 May 2025 16:52:23 +0200 Subject: netfilter: conntrack: remove DCCP protocol support The DCCP socket family has now been removed from this tree, see: 8bb3212be4b4 ("Merge branch 'net-retire-dccp-socket'") Remove connection tracking and NAT support for this protocol, this should not pose a problem because no DCCP traffic is expected to be seen on the wire. As for the code for matching on dccp header for iptables and nftables, mark it as deprecated and keep it in place. Ruleset restoration is an atomic operation. Without dccp matching support, an astray match on dccp could break this operation leaving your computer with no policy in place, so let's follow a more conservative approach for matches. Add CONFIG_NFT_EXTHDR_DCCP which is set to 'n' by default to deprecate dccp extension support. Similarly, label CONFIG_NETFILTER_XT_MATCH_DCCP as deprecated too and also set it to 'n' by default. Code to match on DCCP protocol from ebtables also remains in place, this is just a few checks on IPPROTO_DCCP from _check() path which is exercised when ruleset is loaded. There is another use of IPPROTO_DCCP from the _check() path in the iptables multiport match. Another check for IPPROTO_DCCP from the packet in the reject target is also removed. So let's schedule removal of the dccp matching for a second stage, this should not interfer with the dccp retirement since this is only matching on the dccp header. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Kuniyuki Iwashima Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_dccp.h | 38 -------------------------- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 -- include/net/netfilter/nf_conntrack.h | 2 -- include/net/netfilter/nf_conntrack_l4proto.h | 13 --------- include/net/netfilter/nf_reject.h | 1 - include/net/netns/conntrack.h | 13 --------- 6 files changed, 70 deletions(-) delete mode 100644 include/linux/netfilter/nf_conntrack_dccp.h (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h deleted file mode 100644 index c509ed76e714..000000000000 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_CONNTRACK_DCCP_H -#define _NF_CONNTRACK_DCCP_H - -/* Exposed to userspace over nfnetlink */ -enum ct_dccp_states { - CT_DCCP_NONE, - CT_DCCP_REQUEST, - CT_DCCP_RESPOND, - CT_DCCP_PARTOPEN, - CT_DCCP_OPEN, - CT_DCCP_CLOSEREQ, - CT_DCCP_CLOSING, - CT_DCCP_TIMEWAIT, - CT_DCCP_IGNORE, - CT_DCCP_INVALID, - __CT_DCCP_MAX -}; -#define CT_DCCP_MAX (__CT_DCCP_MAX - 1) - -enum ct_dccp_roles { - CT_DCCP_ROLE_CLIENT, - CT_DCCP_ROLE_SERVER, - __CT_DCCP_ROLE_MAX -}; -#define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) - -#include - -struct nf_ct_dccp { - u_int8_t role[IP_CT_DIR_MAX]; - u_int8_t state; - u_int8_t last_pkt; - u_int8_t last_dir; - u_int64_t handshake_seq; -}; - -#endif /* _NF_CONNTRACK_DCCP_H */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 2c8c2b023848..8d65ffbf57de 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -13,9 +13,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; -#ifdef CONFIG_NF_CT_PROTO_DCCP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp; -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #endif diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 3f02a45773e8..a844aa46d076 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -18,7 +18,6 @@ #include #include -#include #include #include @@ -31,7 +30,6 @@ struct nf_ct_udp { /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ - struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_udp udp; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 1f47bef51722..6929f8daf1ed 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -117,11 +117,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); -int nf_conntrack_dccp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state); int nf_conntrack_sctp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, @@ -137,7 +132,6 @@ void nf_conntrack_generic_init_net(struct net *net); void nf_conntrack_tcp_init_net(struct net *net); void nf_conntrack_udp_init_net(struct net *net); void nf_conntrack_gre_init_net(struct net *net); -void nf_conntrack_dccp_init_net(struct net *net); void nf_conntrack_sctp_init_net(struct net *net); void nf_conntrack_icmp_init_net(struct net *net); void nf_conntrack_icmpv6_init_net(struct net *net); @@ -223,13 +217,6 @@ static inline bool nf_conntrack_tcp_established(const struct nf_conn *ct) } #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP -static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.dccp; -} -#endif - #ifdef CONFIG_NF_CT_PROTO_SCTP static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) { diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h index 7c669792fb9c..f1db33bc6bf8 100644 --- a/include/net/netfilter/nf_reject.h +++ b/include/net/netfilter/nf_reject.h @@ -34,7 +34,6 @@ static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, /* Protocols with partial checksums. */ case IPPROTO_UDPLITE: - case IPPROTO_DCCP: return false; } return true; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index bae914815aa3..ab74b5ed0b01 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -7,9 +7,6 @@ #include #include #include -#ifdef CONFIG_NF_CT_PROTO_DCCP -#include -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP #include #endif @@ -50,13 +47,6 @@ struct nf_icmp_net { unsigned int timeout; }; -#ifdef CONFIG_NF_CT_PROTO_DCCP -struct nf_dccp_net { - u8 dccp_loose; - unsigned int dccp_timeout[CT_DCCP_MAX + 1]; -}; -#endif - #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net { unsigned int timeouts[SCTP_CONNTRACK_MAX]; @@ -82,9 +72,6 @@ struct nf_ip_net { struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; -#ifdef CONFIG_NF_CT_PROTO_DCCP - struct nf_dccp_net dccp; -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif -- cgit v1.2.3 From 9931d2899eec3737f4e4fa9fc900be7329816e94 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 20 Jun 2025 13:52:28 +0800 Subject: ASoC: fsl_mqs: Distinguish different modules by system manager indices On i.MX94, the MQS2 also needs to be configured by SCMI interface, add sm_index variable in struct fsl_mqs_soc_data to distinguish the MQS1 and MQS2 on this platform. Add the system manager indices for i.MX94 in the header file. Signed-off-by: Shengjiu Wang Reviewed-by: Peng Fan Link: https://patch.msgid.link/20250620055229.965942-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/linux/firmware/imx/sm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index a8a17eeb7d90..a6220c500f7c 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -18,6 +18,14 @@ #define SCMI_IMX_CTRL_SAI4_MCLK 4 /* WAKE SAI4 MCLK */ #define SCMI_IMX_CTRL_SAI5_MCLK 5 /* WAKE SAI5 MCLK */ +#define SCMI_IMX94_CTRL_PDM_CLK_SEL 0U /*!< AON PDM clock sel */ +#define SCMI_IMX94_CTRL_MQS1_SETTINGS 1U /*!< AON MQS settings */ +#define SCMI_IMX94_CTRL_MQS2_SETTINGS 2U /*!< WAKE MQS settings */ +#define SCMI_IMX94_CTRL_SAI1_MCLK 3U /*!< AON SAI1 MCLK */ +#define SCMI_IMX94_CTRL_SAI2_MCLK 4U /*!< WAKE SAI2 MCLK */ +#define SCMI_IMX94_CTRL_SAI3_MCLK 5U /*!< WAKE SAI3 MCLK */ +#define SCMI_IMX94_CTRL_SAI4_MCLK 6U /*!< WAKE SAI4 MCLK */ + int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); int scmi_imx_misc_ctrl_set(u32 id, u32 val); -- cgit v1.2.3 From baee26a9d6cd3d3c6c3c03c56270aa647a67e4bd Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 20 Jun 2025 13:52:29 +0800 Subject: ASoC: fsl_mqs: rename system manager indices for i.MX95 The system manager indices names are different for each platform, rename the indices for i.MX95 to differentiate with other platform. Signed-off-by: Shengjiu Wang Reviewed-by: Peng Fan Link: https://patch.msgid.link/20250620055229.965942-3-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/linux/firmware/imx/sm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index a6220c500f7c..d4212bc42b2c 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -11,12 +11,12 @@ #include #include -#define SCMI_IMX_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */ -#define SCMI_IMX_CTRL_MQS1_SETTINGS 1 /* AON MQS settings */ -#define SCMI_IMX_CTRL_SAI1_MCLK 2 /* AON SAI1 MCLK */ -#define SCMI_IMX_CTRL_SAI3_MCLK 3 /* WAKE SAI3 MCLK */ -#define SCMI_IMX_CTRL_SAI4_MCLK 4 /* WAKE SAI4 MCLK */ -#define SCMI_IMX_CTRL_SAI5_MCLK 5 /* WAKE SAI5 MCLK */ +#define SCMI_IMX95_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */ +#define SCMI_IMX95_CTRL_MQS1_SETTINGS 1 /* AON MQS settings */ +#define SCMI_IMX95_CTRL_SAI1_MCLK 2 /* AON SAI1 MCLK */ +#define SCMI_IMX95_CTRL_SAI3_MCLK 3 /* WAKE SAI3 MCLK */ +#define SCMI_IMX95_CTRL_SAI4_MCLK 4 /* WAKE SAI4 MCLK */ +#define SCMI_IMX95_CTRL_SAI5_MCLK 5 /* WAKE SAI5 MCLK */ #define SCMI_IMX94_CTRL_PDM_CLK_SEL 0U /*!< AON PDM clock sel */ #define SCMI_IMX94_CTRL_MQS1_SETTINGS 1U /*!< AON MQS settings */ -- cgit v1.2.3 From 5b605dbee07dda8fd538af1f07cbf1baf0a49cbc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Jul 2025 15:26:58 +0200 Subject: timekeeping: Provide ktime_get_clock_ts64() PTP implements an inline switch case for taking timestamps from various POSIX clock IDs, which already consumes quite some text space. Expanding it for auxiliary clocks really becomes too big for inlining. Provide a out of line version. The function invalidates the timestamp in case the clock is invalid. The invalidation allows to implement a validation check without the need to propagate a return value through deep existing call chains. Due to merge logistics this temporarily defines CLOCK_AUX[_LAST] if undefined, so that the plain branch, which does not contain any of the core timekeeper changes, can be pulled into the networking tree as prerequisite for the PTP side changes. These temporary defines are removed after that branch is merged into the tip::timers/ptp branch. That way the result in -next or upstream in the next merge window has zero dependencies. Signed-off-by: Thomas Gleixner Reviewed-by: Vadim Fedorenko Acked-by: John Stultz Link: https://lore.kernel.org/all/20250701132628.357686408@linutronix.de --- include/linux/timekeeping.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 542773650200..4a4c2778abae 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts); extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +extern void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); @@ -345,4 +346,13 @@ void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, extern int update_persistent_clock64(struct timespec64 now); #endif +/* Temporary workaround to avoid merge dependencies and cross tree messes */ +#ifndef CLOCK_AUX +#define CLOCK_AUX MAX_CLOCKS +#define MAX_AUX_CLOCKS 8 +#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1) + +static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; } +#endif + #endif -- cgit v1.2.3 From 8959338617a85e35820e3a7fa21801cf55b068bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Jul 2025 14:39:28 +0200 Subject: timekeeping: Remove the temporary CLOCK_AUX workaround ktime_get_clock_ts64() was provided for the networking tree as a stand alone commit based on v6.16-rc1. It contains a temporary workaround for the CLOCK_AUX* defines, which are only available in the timekeeping tree. As this commit is now merged into the timers/ptp branch, which contains the real CLOCK_AUX* defines, the workaround is obsolete. Remove it. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701130923.579834908@linutronix.de --- include/linux/timekeeping.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 6121924d93c4..aee2c1a46e47 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -357,13 +357,4 @@ void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, extern int update_persistent_clock64(struct timespec64 now); #endif -/* Temporary workaround to avoid merge dependencies and cross tree messes */ -#ifndef CLOCK_AUX -#define CLOCK_AUX MAX_CLOCKS -#define MAX_AUX_CLOCKS 8 -#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1) - -static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; } -#endif - #endif -- cgit v1.2.3 From 4c09a4cebd0320c5381afad3fb6f997f20082a3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Jul 2025 15:27:00 +0200 Subject: ptp: Use ktime_get_clock_ts64() for timestamping The inlined ptp_read_system_[pre|post]ts() switch cases expand to a copious amount of text in drivers, e.g. ~500 bytes in e1000e. Adding auxiliary clock support to the inlines would increase it further. Replace the inline switch case with a call to ktime_get_clock_ts64(), which reduces the code size in drivers and allows to access auxiliary clocks once they are enabled in the IOCTL parameter filter. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Vadim Fedorenko Acked-by: John Stultz Link: https://patch.msgid.link/20250701132628.426168092@linutronix.de Signed-off-by: Paolo Abeni --- include/linux/ptp_clock_kernel.h | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index eced7e9bf69a..3d089bd4d5e9 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -477,40 +477,14 @@ static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, static inline void ptp_read_system_prets(struct ptp_system_timestamp *sts) { - if (sts) { - switch (sts->clockid) { - case CLOCK_REALTIME: - ktime_get_real_ts64(&sts->pre_ts); - break; - case CLOCK_MONOTONIC: - ktime_get_ts64(&sts->pre_ts); - break; - case CLOCK_MONOTONIC_RAW: - ktime_get_raw_ts64(&sts->pre_ts); - break; - default: - break; - } - } + if (sts) + ktime_get_clock_ts64(sts->clockid, &sts->pre_ts); } static inline void ptp_read_system_postts(struct ptp_system_timestamp *sts) { - if (sts) { - switch (sts->clockid) { - case CLOCK_REALTIME: - ktime_get_real_ts64(&sts->post_ts); - break; - case CLOCK_MONOTONIC: - ktime_get_ts64(&sts->post_ts); - break; - case CLOCK_MONOTONIC_RAW: - ktime_get_raw_ts64(&sts->post_ts); - break; - default: - break; - } - } + if (sts) + ktime_get_clock_ts64(sts->clockid, &sts->post_ts); } #endif -- cgit v1.2.3 From 858e65af91351f8842bbe2c5ae6f100778783f42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 26 Jun 2025 16:48:58 +0200 Subject: irqdomain: Add device pointer to irq_domain_info and msi_domain_info Add device pointer to irq_domain_info and msi_domain_info, so that the device can be specified at domain creation time. Signed-off-by: Thomas Gleixner Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/943e52403b20cf13c320d55bd4446b4562466aab.1750860131.git.namcao@linutronix.de --- include/linux/irqdomain.h | 2 ++ include/linux/msi.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 7387d183029b..266b5e5bb8ce 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -279,6 +279,7 @@ struct irq_domain_chip_generic_info; * domains are added using same fwnode * @ops: Domain operation callbacks * @host_data: Controller private data pointer + * @dev: Device which creates the domain * @dgc_info: Geneneric chip information structure pointer used to * create generic chips for the domain if not NULL. * @init: Function called when the domain is created. @@ -298,6 +299,7 @@ struct irq_domain_info { const char *name_suffix; const struct irq_domain_ops *ops; void *host_data; + struct device *dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** * @parent: Pointer to the parent irq domain used in a hierarchy domain diff --git a/include/linux/msi.h b/include/linux/msi.h index 6863540f4b71..77227d23ea84 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -488,6 +488,7 @@ struct msi_domain_ops { * gets initialized to the maximum software index limit * by the domain creation code. * @ops: The callback data structure + * @dev: Device which creates the domain * @chip: Optional: associated interrupt chip * @chip_data: Optional: associated interrupt chip data * @handler: Optional: associated interrupt flow handler @@ -501,6 +502,7 @@ struct msi_domain_info { enum irq_domain_bus_token bus_token; unsigned int hwsize; struct msi_domain_ops *ops; + struct device *dev; struct irq_chip *chip; void *chip_data; irq_flow_handler_t handler; -- cgit v1.2.3 From 25a59e813cd2ca728047f657d64f9b29480be393 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 2 Jul 2025 06:37:03 -0500 Subject: dt-bindings: soc: spacemit: define spacemit,k1-ccu resets There are additional SpacemiT syscon CCUs whose registers control both clocks and resets: RCPU, RCPU2, and APBC2. Unlike those defined previously, these will (initially) support only resets. They do not incorporate power domain functionality. Previously the clock properties were required for all compatible nodes. Make that requirement only apply to the three existing CCUs (APBC, APMU, and MPMU), so that the new reset-only CCUs can go without specifying them. Define the index values for resets associated with all SpacemiT K1 syscon nodes, including those with clocks already defined, as well as the new ones (without clocks). Signed-off-by: Alex Elder Reviewed-by: Krzysztof Kozlowski Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250702113709.291748-2-elder@riscstar.com Signed-off-by: Yixun Lan --- include/dt-bindings/clock/spacemit,k1-syscon.h | 141 +++++++++++++++++++++++++ 1 file changed, 141 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/spacemit,k1-syscon.h b/include/dt-bindings/clock/spacemit,k1-syscon.h index 35968ae98246..2714c3fe66cd 100644 --- a/include/dt-bindings/clock/spacemit,k1-syscon.h +++ b/include/dt-bindings/clock/spacemit,k1-syscon.h @@ -78,6 +78,9 @@ #define CLK_APB 31 #define CLK_WDT_BUS 32 +/* MPMU resets */ +#define RESET_WDT 0 + /* APBC clocks */ #define CLK_UART0 0 #define CLK_UART2 1 @@ -180,6 +183,59 @@ #define CLK_TSEN_BUS 98 #define CLK_IPC_AP2AUD_BUS 99 +/* APBC resets */ +#define RESET_UART0 0 +#define RESET_UART2 1 +#define RESET_UART3 2 +#define RESET_UART4 3 +#define RESET_UART5 4 +#define RESET_UART6 5 +#define RESET_UART7 6 +#define RESET_UART8 7 +#define RESET_UART9 8 +#define RESET_GPIO 9 +#define RESET_PWM0 10 +#define RESET_PWM1 11 +#define RESET_PWM2 12 +#define RESET_PWM3 13 +#define RESET_PWM4 14 +#define RESET_PWM5 15 +#define RESET_PWM6 16 +#define RESET_PWM7 17 +#define RESET_PWM8 18 +#define RESET_PWM9 19 +#define RESET_PWM10 20 +#define RESET_PWM11 21 +#define RESET_PWM12 22 +#define RESET_PWM13 23 +#define RESET_PWM14 24 +#define RESET_PWM15 25 +#define RESET_PWM16 26 +#define RESET_PWM17 27 +#define RESET_PWM18 28 +#define RESET_PWM19 29 +#define RESET_SSP3 30 +#define RESET_RTC 31 +#define RESET_TWSI0 32 +#define RESET_TWSI1 33 +#define RESET_TWSI2 34 +#define RESET_TWSI4 35 +#define RESET_TWSI5 36 +#define RESET_TWSI6 37 +#define RESET_TWSI7 38 +#define RESET_TWSI8 39 +#define RESET_TIMERS1 40 +#define RESET_TIMERS2 41 +#define RESET_AIB 42 +#define RESET_ONEWIRE 43 +#define RESET_SSPA0 44 +#define RESET_SSPA1 45 +#define RESET_DRO 46 +#define RESET_IR 47 +#define RESET_TSEN 48 +#define RESET_IPC_AP2AUD 49 +#define RESET_CAN0 50 + /* APMU clocks */ #define CLK_CCI550 0 #define CLK_CPU_C0_HI 1 @@ -244,4 +300,89 @@ #define CLK_V2D 60 #define CLK_EMMC_BUS 61 +/* APMU resets */ +#define RESET_CCIC_4X 0 +#define RESET_CCIC1_PHY 1 +#define RESET_SDH_AXI 2 +#define RESET_SDH0 3 +#define RESET_SDH1 4 +#define RESET_SDH2 5 +#define RESET_USBP1_AXI 6 +#define RESET_USB_AXI 7 +#define RESET_USB30_AHB 8 +#define RESET_USB30_VCC 9 +#define RESET_USB30_PHY 10 +#define RESET_QSPI 11 +#define RESET_QSPI_BUS 12 +#define RESET_DMA 13 +#define RESET_AES 14 +#define RESET_VPU 15 +#define RESET_GPU 16 +#define RESET_EMMC 17 +#define RESET_EMMC_X 18 +#define RESET_AUDIO_SYS 19 +#define RESET_AUDIO_MCU 20 +#define RESET_AUDIO_APMU 21 +#define RESET_HDMI 22 +#define RESET_PCIE0_MASTER 23 +#define RESET_PCIE0_SLAVE 24 +#define RESET_PCIE0_DBI 25 +#define RESET_PCIE0_GLOBAL 26 +#define RESET_PCIE1_MASTER 27 +#define RESET_PCIE1_SLAVE 28 +#define RESET_PCIE1_DBI 29 +#define RESET_PCIE1_GLOBAL 30 +#define RESET_PCIE2_MASTER 31 +#define RESET_PCIE2_SLAVE 32 +#define RESET_PCIE2_DBI 33 +#define RESET_PCIE2_GLOBAL 34 +#define RESET_EMAC0 35 +#define RESET_EMAC1 36 +#define RESET_JPG 37 +#define RESET_CCIC2PHY 38 +#define RESET_CCIC3PHY 39 +#define RESET_CSI 40 +#define RESET_ISP_CPP 41 +#define RESET_ISP_BUS 42 +#define RESET_ISP 43 +#define RESET_ISP_CI 44 +#define RESET_DPU_MCLK 45 +#define RESET_DPU_ESC 46 +#define RESET_DPU_HCLK 47 +#define RESET_DPU_SPIBUS 48 +#define RESET_DPU_SPI_HBUS 49 +#define RESET_V2D 50 +#define RESET_MIPI 51 +#define RESET_MC 52 + +/* RCPU resets */ +#define RESET_RCPU_SSP0 0 +#define RESET_RCPU_I2C0 1 +#define RESET_RCPU_UART1 2 +#define RESET_RCPU_IR 3 +#define RESET_RCPU_CAN 4 +#define RESET_RCPU_UART0 5 +#define RESET_RCPU_HDMI_AUDIO 6 + +/* RCPU2 resets */ +#define RESET_RCPU2_PWM0 0 +#define RESET_RCPU2_PWM1 1 +#define RESET_RCPU2_PWM2 2 +#define RESET_RCPU2_PWM3 3 +#define RESET_RCPU2_PWM4 4 +#define RESET_RCPU2_PWM5 5 +#define RESET_RCPU2_PWM6 6 +#define RESET_RCPU2_PWM7 7 +#define RESET_RCPU2_PWM8 8 +#define RESET_RCPU2_PWM9 9 + +/* APBC2 resets */ +#define RESET_APBC2_UART1 0 +#define RESET_APBC2_SSP2 1 +#define RESET_APBC2_TWSI3 2 +#define RESET_APBC2_RTC 3 +#define RESET_APBC2_TIMERS0 4 +#define RESET_APBC2_KPC 5 +#define RESET_APBC2_GPIO 6 + #endif /* _DT_BINDINGS_SPACEMIT_CCU_H_ */ -- cgit v1.2.3 From 9047685cfd2911c36ce89a16270aafa71057c507 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 18:42:44 +0300 Subject: PM: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250626154244.324265-1-andriy.shevchenko@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index f0bd8fbae4f2..938b1b446a5d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -8,14 +8,15 @@ #ifndef _LINUX_PM_H #define _LINUX_PM_H +#include #include -#include -#include +#include +#include #include +#include +#include #include -#include -#include -#include +#include /* * Callbacks for platform drivers to implement. -- cgit v1.2.3 From c021c1b38f90d639423c1369625daa703a8472ea Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Jun 2025 21:08:48 +0200 Subject: PM: Move two sleep-related functions under CONFIG_PM_SLEEP Since pm_runtime_force_resume() and pm_runtime_need_not_resume() are only needed for handling system-wide PM transitions, there is no reason to compile them in if CONFIG_PM_SLEEP is unset. Accordingly, move them under CONFIG_PM_SLEEP and make the static inline stub for pm_runtime_force_resume() return an error to indicate that it should not be used outside CONFIG_PM_SLEEP. Putting pm_runtime_force_resume() also allows subsequent changes to be more straightforward because this function is going to access a device PM flag that is only defined when CONFIG_PM_SLEEP is set. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/3384523.aeNJFYEL58@rjwysocki.net --- include/linux/pm_runtime.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index e7cb70fcc0af..9bea07f22041 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -66,9 +66,7 @@ static inline bool queue_pm_work(struct work_struct *work) extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); -extern bool pm_runtime_need_not_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); -extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); @@ -257,9 +255,7 @@ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } -static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } -static inline int pm_runtime_force_resume(struct device *dev) { return 0; } static inline int __pm_runtime_idle(struct device *dev, int rpmflags) { @@ -330,6 +326,18 @@ static inline void pm_runtime_release_supplier(struct device_link *link) {} #endif /* !CONFIG_PM */ +#ifdef CONFIG_PM_SLEEP + +bool pm_runtime_need_not_resume(struct device *dev); +int pm_runtime_force_resume(struct device *dev); + +#else /* !CONFIG_PM_SLEEP */ + +static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; } +static inline int pm_runtime_force_resume(struct device *dev) { return -ENXIO; } + +#endif /* CONFIG_PM_SLEEP */ + /** * pm_runtime_idle - Conditionally set up autosuspend of a device or suspend it. * @dev: Target device. -- cgit v1.2.3 From ffda4ca4608ea811aee2aace211bbf27c68a8853 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Jun 2025 21:23:42 +0200 Subject: PM: sleep: Add strict_midlayer flag to struct dev_pm_info Add a new flag, called strict_midlayer, to struct dev_pm_info, along with helper functions for updating and reading its value, to allow middle layer code that provides proper callbacks for device suspend- resume during system-wide PM transitions to let pm_runtime_force_suspend() and and pm_runtime_force_resume() know that they should only invoke runtime PM callbacks coming from the device's driver. Namely, if this flag is set, pm_runtime_force_suspend() and and pm_runtime_force_resume() will invoke runtime PM callbacks provided by the device's driver directly with the assumption that they have been called via a middle layer callback for device suspend or resume, respectively. For instance, acpi_general_pm_domain provides specific callback functions for system suspend, acpi_subsys_suspend(), acpi_subsys_suspend_late() and acpi_subsys_suspend_noirq(), and it does not expect its runtime suspend callback function, acpi_subsys_runtime_suspend(), to be invoked at any point during system suspend. In particular, it does not expect that function to be called from within any of the system suspend callback functions mentioned above which would happen if a device driver collaborating with acpi_general_pm_domain used pm_runtime_force_suspend() as its callback function for any system suspend phase later than "prepare". The new flag allows this expectation of acpi_general_pm_domain to be formally expressed, which is going to be done subsequently. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/24017035.6Emhk5qWAg@rjwysocki.net --- include/linux/device.h | 27 +++++++++++++++++++++++++++ include/linux/pm.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 4940db137fff..5137f9d213ec 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -879,6 +879,33 @@ static inline bool dev_pm_smart_suspend(struct device *dev) #endif } +/* + * dev_pm_set_strict_midlayer - Update the device's power.strict_midlayer flag + * @dev: Target device. + * @val: New flag value. + * + * When set, power.strict_midlayer means that the middle layer power management + * code (typically, a bus type or a PM domain) does not expect its runtime PM + * suspend callback to be invoked at all during system-wide PM transitions and + * it does not expect its runtime PM resume callback to be invoked at any point + * when runtime PM is disabled for the device during system-wide PM transitions. + */ +static inline void dev_pm_set_strict_midlayer(struct device *dev, bool val) +{ +#ifdef CONFIG_PM_SLEEP + dev->power.strict_midlayer = val; +#endif +} + +static inline bool dev_pm_strict_midlayer_is_set(struct device *dev) +{ +#ifdef CONFIG_PM_SLEEP + return dev->power.strict_midlayer; +#else + return false; +#endif +} + static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); diff --git a/include/linux/pm.h b/include/linux/pm.h index f0bd8fbae4f2..4149d45f6f76 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -683,6 +683,7 @@ struct dev_pm_info { bool smart_suspend:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ + bool strict_midlayer:1; #else bool should_wakeup:1; #endif -- cgit v1.2.3 From f8e656382b4aa45ae51135b72262044550224920 Mon Sep 17 00:00:00 2001 From: Philip Radford Date: Mon, 30 Jun 2025 10:55:43 +0000 Subject: include: trace: Add tracepoint support for inflight xfer count Enhance the existing SCMI transfer tracepoints by including the current in-flight transfer count in `scmi_xfer_begin` and `scmi_xfer_end`. Introduce a new helper `scmi_inflight_count()` to retrieve the active transfer count from the SCMI debug counters when debug is enabled. This trace data is useful for visualizing transfer activity over time and identifying congestion or unexpected behavior in SCMI messaging. Reviewed-by: Cristian Marussi Signed-off-by: Philip Radford Message-Id: <20250630105544.531723-4-philip.radford@arm.com> Signed-off-by: Sudeep Holla --- include/trace/events/scmi.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/scmi.h b/include/trace/events/scmi.h index 127300481123..703b7bb68e44 100644 --- a/include/trace/events/scmi.h +++ b/include/trace/events/scmi.h @@ -36,8 +36,8 @@ TRACE_EVENT(scmi_fc_call, TRACE_EVENT(scmi_xfer_begin, TP_PROTO(int transfer_id, u8 msg_id, u8 protocol_id, u16 seq, - bool poll), - TP_ARGS(transfer_id, msg_id, protocol_id, seq, poll), + bool poll, int inflight), + TP_ARGS(transfer_id, msg_id, protocol_id, seq, poll, inflight), TP_STRUCT__entry( __field(int, transfer_id) @@ -45,6 +45,7 @@ TRACE_EVENT(scmi_xfer_begin, __field(u8, protocol_id) __field(u16, seq) __field(bool, poll) + __field(int, inflight) ), TP_fast_assign( @@ -53,11 +54,12 @@ TRACE_EVENT(scmi_xfer_begin, __entry->protocol_id = protocol_id; __entry->seq = seq; __entry->poll = poll; + __entry->inflight = inflight; ), - TP_printk("pt=%02X msg_id=%02X seq=%04X transfer_id=%X poll=%u", - __entry->protocol_id, __entry->msg_id, __entry->seq, - __entry->transfer_id, __entry->poll) + TP_printk("pt=%02X msg_id=%02X seq=%04X transfer_id=%X poll=%u inflight=%d", + __entry->protocol_id, __entry->msg_id, __entry->seq, + __entry->transfer_id, __entry->poll, __entry->inflight) ); TRACE_EVENT(scmi_xfer_response_wait, @@ -90,8 +92,8 @@ TRACE_EVENT(scmi_xfer_response_wait, TRACE_EVENT(scmi_xfer_end, TP_PROTO(int transfer_id, u8 msg_id, u8 protocol_id, u16 seq, - int status), - TP_ARGS(transfer_id, msg_id, protocol_id, seq, status), + int status, int inflight), + TP_ARGS(transfer_id, msg_id, protocol_id, seq, status, inflight), TP_STRUCT__entry( __field(int, transfer_id) @@ -99,6 +101,7 @@ TRACE_EVENT(scmi_xfer_end, __field(u8, protocol_id) __field(u16, seq) __field(int, status) + __field(int, inflight) ), TP_fast_assign( @@ -107,11 +110,12 @@ TRACE_EVENT(scmi_xfer_end, __entry->protocol_id = protocol_id; __entry->seq = seq; __entry->status = status; + __entry->inflight = inflight; ), - TP_printk("pt=%02X msg_id=%02X seq=%04X transfer_id=%X s=%d", - __entry->protocol_id, __entry->msg_id, __entry->seq, - __entry->transfer_id, __entry->status) + TP_printk("pt=%02X msg_id=%02X seq=%04X transfer_id=%X s=%d inflight=%d", + __entry->protocol_id, __entry->msg_id, __entry->seq, + __entry->transfer_id, __entry->status, __entry->inflight) ); TRACE_EVENT(scmi_rx_done, -- cgit v1.2.3 From bf6239ddaa6a73a44cd8ea3afec5fc82ed900038 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 2 Jul 2025 06:37:04 -0500 Subject: soc: spacemit: create a header for clock/reset registers Move the definitions of register offsets and fields used by the SpacemiT K1 SoC CCUs into a separate header file, so that they can be shared by the reset driver that will be found under drivers/reset. Signed-off-by: Alex Elder Reviewed-by: Haylen Chu Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250702113709.291748-3-elder@riscstar.com Signed-off-by: Yixun Lan --- include/soc/spacemit/k1-syscon.h | 118 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 include/soc/spacemit/k1-syscon.h (limited to 'include') diff --git a/include/soc/spacemit/k1-syscon.h b/include/soc/spacemit/k1-syscon.h new file mode 100644 index 000000000000..039a448c51a0 --- /dev/null +++ b/include/soc/spacemit/k1-syscon.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* SpacemiT clock and reset driver definitions for the K1 SoC */ + +#ifndef __SOC_K1_SYSCON_H__ +#define __SOC_K1_SYSCON_H__ + +/* APBS register offset */ +#define APBS_PLL1_SWCR1 0x100 +#define APBS_PLL1_SWCR2 0x104 +#define APBS_PLL1_SWCR3 0x108 +#define APBS_PLL2_SWCR1 0x118 +#define APBS_PLL2_SWCR2 0x11c +#define APBS_PLL2_SWCR3 0x120 +#define APBS_PLL3_SWCR1 0x124 +#define APBS_PLL3_SWCR2 0x128 +#define APBS_PLL3_SWCR3 0x12c + +/* MPMU register offset */ +#define MPMU_POSR 0x0010 +#define POSR_PLL1_LOCK BIT(27) +#define POSR_PLL2_LOCK BIT(28) +#define POSR_PLL3_LOCK BIT(29) +#define MPMU_SUCCR 0x0014 +#define MPMU_ISCCR 0x0044 +#define MPMU_WDTPCR 0x0200 +#define MPMU_RIPCCR 0x0210 +#define MPMU_ACGR 0x1024 +#define MPMU_APBCSCR 0x1050 +#define MPMU_SUCCR_1 0x10b0 + +/* APBC register offset */ +#define APBC_UART1_CLK_RST 0x00 +#define APBC_UART2_CLK_RST 0x04 +#define APBC_GPIO_CLK_RST 0x08 +#define APBC_PWM0_CLK_RST 0x0c +#define APBC_PWM1_CLK_RST 0x10 +#define APBC_PWM2_CLK_RST 0x14 +#define APBC_PWM3_CLK_RST 0x18 +#define APBC_TWSI8_CLK_RST 0x20 +#define APBC_UART3_CLK_RST 0x24 +#define APBC_RTC_CLK_RST 0x28 +#define APBC_TWSI0_CLK_RST 0x2c +#define APBC_TWSI1_CLK_RST 0x30 +#define APBC_TIMERS1_CLK_RST 0x34 +#define APBC_TWSI2_CLK_RST 0x38 +#define APBC_AIB_CLK_RST 0x3c +#define APBC_TWSI4_CLK_RST 0x40 +#define APBC_TIMERS2_CLK_RST 0x44 +#define APBC_ONEWIRE_CLK_RST 0x48 +#define APBC_TWSI5_CLK_RST 0x4c +#define APBC_DRO_CLK_RST 0x58 +#define APBC_IR_CLK_RST 0x5c +#define APBC_TWSI6_CLK_RST 0x60 +#define APBC_COUNTER_CLK_SEL 0x64 +#define APBC_TWSI7_CLK_RST 0x68 +#define APBC_TSEN_CLK_RST 0x6c +#define APBC_UART4_CLK_RST 0x70 +#define APBC_UART5_CLK_RST 0x74 +#define APBC_UART6_CLK_RST 0x78 +#define APBC_SSP3_CLK_RST 0x7c +#define APBC_SSPA0_CLK_RST 0x80 +#define APBC_SSPA1_CLK_RST 0x84 +#define APBC_IPC_AP2AUD_CLK_RST 0x90 +#define APBC_UART7_CLK_RST 0x94 +#define APBC_UART8_CLK_RST 0x98 +#define APBC_UART9_CLK_RST 0x9c +#define APBC_CAN0_CLK_RST 0xa0 +#define APBC_PWM4_CLK_RST 0xa8 +#define APBC_PWM5_CLK_RST 0xac +#define APBC_PWM6_CLK_RST 0xb0 +#define APBC_PWM7_CLK_RST 0xb4 +#define APBC_PWM8_CLK_RST 0xb8 +#define APBC_PWM9_CLK_RST 0xbc +#define APBC_PWM10_CLK_RST 0xc0 +#define APBC_PWM11_CLK_RST 0xc4 +#define APBC_PWM12_CLK_RST 0xc8 +#define APBC_PWM13_CLK_RST 0xcc +#define APBC_PWM14_CLK_RST 0xd0 +#define APBC_PWM15_CLK_RST 0xd4 +#define APBC_PWM16_CLK_RST 0xd8 +#define APBC_PWM17_CLK_RST 0xdc +#define APBC_PWM18_CLK_RST 0xe0 +#define APBC_PWM19_CLK_RST 0xe4 + +/* APMU register offset */ +#define APMU_JPG_CLK_RES_CTRL 0x020 +#define APMU_CSI_CCIC2_CLK_RES_CTRL 0x024 +#define APMU_ISP_CLK_RES_CTRL 0x038 +#define APMU_LCD_CLK_RES_CTRL1 0x044 +#define APMU_LCD_SPI_CLK_RES_CTRL 0x048 +#define APMU_LCD_CLK_RES_CTRL2 0x04c +#define APMU_CCIC_CLK_RES_CTRL 0x050 +#define APMU_SDH0_CLK_RES_CTRL 0x054 +#define APMU_SDH1_CLK_RES_CTRL 0x058 +#define APMU_USB_CLK_RES_CTRL 0x05c +#define APMU_QSPI_CLK_RES_CTRL 0x060 +#define APMU_DMA_CLK_RES_CTRL 0x064 +#define APMU_AES_CLK_RES_CTRL 0x068 +#define APMU_VPU_CLK_RES_CTRL 0x0a4 +#define APMU_GPU_CLK_RES_CTRL 0x0cc +#define APMU_SDH2_CLK_RES_CTRL 0x0e0 +#define APMU_PMUA_MC_CTRL 0x0e8 +#define APMU_PMU_CC2_AP 0x100 +#define APMU_PMUA_EM_CLK_RES_CTRL 0x104 +#define APMU_AUDIO_CLK_RES_CTRL 0x14c +#define APMU_HDMI_CLK_RES_CTRL 0x1b8 +#define APMU_CCI550_CLK_CTRL 0x300 +#define APMU_ACLK_CLK_CTRL 0x388 +#define APMU_CPU_C0_CLK_CTRL 0x38C +#define APMU_CPU_C1_CLK_CTRL 0x390 +#define APMU_PCIE_CLK_RES_CTRL_0 0x3cc +#define APMU_PCIE_CLK_RES_CTRL_1 0x3d4 +#define APMU_PCIE_CLK_RES_CTRL_2 0x3dc +#define APMU_EMAC0_CLK_RES_CTRL 0x3e4 +#define APMU_EMAC1_CLK_RES_CTRL 0x3ec + +#endif /* __SOC_K1_SYSCON_H__ */ -- cgit v1.2.3 From 988543522ebd6a9af53c288833503f0501e401b0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 2 Jul 2025 06:37:05 -0500 Subject: clk: spacemit: set up reset auxiliary devices Add a new reset_name field to the spacemit_ccu_data structure. If it is non-null, the CCU implements a reset controller, and the name will be used in the name for the auxiliary device that implements it. Define a new type to hold an auxiliary device as well as the regmap pointer that will be needed by CCU reset controllers. Set up code to initialize and add an auxiliary device for any CCU that implements reset functionality. Make it optional for a CCU to implement a clock controller. This doesn't apply to any of the existing CCUs but will for some new ones that will be added soon. Signed-off-by: Alex Elder Reviewed-by: Haylen Chu Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250702113709.291748-4-elder@riscstar.com Signed-off-by: Yixun Lan --- include/soc/spacemit/k1-syscon.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/soc/spacemit/k1-syscon.h b/include/soc/spacemit/k1-syscon.h index 039a448c51a0..53eff7691f33 100644 --- a/include/soc/spacemit/k1-syscon.h +++ b/include/soc/spacemit/k1-syscon.h @@ -5,6 +5,18 @@ #ifndef __SOC_K1_SYSCON_H__ #define __SOC_K1_SYSCON_H__ +/* Auxiliary device used to represent a CCU reset controller */ +struct spacemit_ccu_adev { + struct auxiliary_device adev; + struct regmap *regmap; +}; + +static inline struct spacemit_ccu_adev * +to_spacemit_ccu_adev(struct auxiliary_device *adev) +{ + return container_of(adev, struct spacemit_ccu_adev, adev); +} + /* APBS register offset */ #define APBS_PLL1_SWCR1 0x100 #define APBS_PLL1_SWCR2 0x104 -- cgit v1.2.3 From f45b2949b1a235881255132a119b8cc8c3738bd5 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 1 Jul 2025 22:11:21 +0200 Subject: clk: sunxi-ng: v3s: Fix CSI SCLK clock name The CSI SCLK clock is incorrectly called CSI1 SCLK while it is used for both the CSI0 and CSI1 interfaces and is called CSI SCLK all around the documentation. Fix the name in the driver, header and device-tree. Fixes: d0f11d14b0bc ("clk: sunxi-ng: add support for V3s CCU") Signed-off-by: Paul Kocialkowski Reviewed-By: Icenowy Zheng Link: https://patch.msgid.link/20250701201124.812882-3-paulk@sys-base.io Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/clock/sun8i-v3s-ccu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/sun8i-v3s-ccu.h b/include/dt-bindings/clock/sun8i-v3s-ccu.h index 014ac6123d17..c4055629c9f9 100644 --- a/include/dt-bindings/clock/sun8i-v3s-ccu.h +++ b/include/dt-bindings/clock/sun8i-v3s-ccu.h @@ -96,7 +96,7 @@ #define CLK_TCON0 64 #define CLK_CSI_MISC 65 #define CLK_CSI0_MCLK 66 -#define CLK_CSI1_SCLK 67 +#define CLK_CSI_SCLK 67 #define CLK_CSI1_MCLK 68 #define CLK_VE 69 #define CLK_AC_DIG 70 -- cgit v1.2.3 From 59710a26a289ad4e7ef227d22063e964930928b0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 30 Jun 2025 15:37:46 -0400 Subject: Bluetooth: hci_core: Remove check of BDADDR_ANY in hci_conn_hash_lookup_big_state The check for destination to be BDADDR_ANY is no longer necessary with the introduction of BIS_LINK. Fixes: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc8f544e20e..0da011fc8146 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1350,8 +1350,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { -- cgit v1.2.3 From 86c947b363f003153768d879ee15f8358cbf29c5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 2 Jul 2025 16:28:31 -0700 Subject: drm: Simplify drmm_alloc_ordered_workqueue return Rather than returning ERR_PTR or NULL on failure, replace the NULL return with ERR_PTR(-ENOMEM). This simplifies error handling at the caller. While here, add kernel documentation for drmm_alloc_ordered_workqueue. Cc: Louis Chauvet Signed-off-by: Matthew Brost Reviewed-by: Louis Chauvet Link: https://lore.kernel.org/r/20250702232831.3271328-2-matthew.brost@intel.com --- include/drm/drm_managed.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h index 53017cc609ac..72bfac002c06 100644 --- a/include/drm/drm_managed.h +++ b/include/drm/drm_managed.h @@ -129,14 +129,25 @@ void __drmm_mutex_release(struct drm_device *dev, void *res); void __drmm_workqueue_release(struct drm_device *device, void *wq); +/** + * drmm_alloc_ordered_workqueue - &drm_device managed alloc_ordered_workqueue() + * @dev: DRM device + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) + * @args: args for @fmt + * + * This is a &drm_device-managed version of alloc_ordered_workqueue(). The + * allocated workqueue is automatically destroyed on the final drm_dev_put(). + * + * Returns: workqueue on success, negative ERR_PTR otherwise. + */ #define drmm_alloc_ordered_workqueue(dev, fmt, flags, args...) \ ({ \ struct workqueue_struct *wq = alloc_ordered_workqueue(fmt, flags, ##args); \ wq ? ({ \ int ret = drmm_add_action_or_reset(dev, __drmm_workqueue_release, wq); \ ret ? ERR_PTR(ret) : wq; \ - }) : \ - wq; \ + }) : ERR_PTR(-ENOMEM); \ }) #endif -- cgit v1.2.3 From 803f0700a3bbf528c4c624a22f87d08178ca0fbe Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Wed, 2 Jul 2025 23:39:56 +0800 Subject: bpf: Show precise link_type for {uprobe,kprobe}_multi fdinfo Alexei suggested, 'link_type' can be more precise and differentiate for human in fdinfo. In fact BPF_LINK_TYPE_KPROBE_MULTI includes kretprobe_multi type, the same as BPF_LINK_TYPE_UPROBE_MULTI, so we can show it more concretely. link_type: kprobe_multi link_id: 1 prog_tag: d2b307e915f0dd37 ... link_type: kretprobe_multi link_id: 2 prog_tag: ab9ea0545870781d ... link_type: uprobe_multi link_id: 9 prog_tag: e729f789e34a8eca ... link_type: uretprobe_multi link_id: 10 prog_tag: 7db356c03e61a4d4 Co-developed-by: Jiri Olsa Signed-off-by: Jiri Olsa Signed-off-by: Tao Chen Link: https://lore.kernel.org/r/20250702153958.639852-1-chen.dylane@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5dd556e89cce..287c956cdbd2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1702,6 +1702,7 @@ struct bpf_link { * link's semantics is determined by target attach hook */ bool sleepable; + u32 flags; /* rcu is used before freeing, work can be used to schedule that * RCU-based freeing before that, so they never overlap */ -- cgit v1.2.3 From 0426729f46cd1f6354fad07267a21579186a5757 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Jul 2025 13:48:07 -0700 Subject: bpf: Refactor bprintf buffer support Refactor code to be able to get and put bprintf buffers and use bpf_printf_prepare independently. This will be used in the next patch to implement BPF streams support, particularly as a staging buffer for strings that need to be formatted and then allocated and pushed into a stream. Reviewed-by: Emil Tsalapatis Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250703204818.925464-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 287c956cdbd2..a07451457d9e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3551,6 +3551,16 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 #define MAX_BPRINTF_BUF 1024 +/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary + * arguments representation. + */ +#define MAX_BPRINTF_BIN_ARGS 512 + +struct bpf_bprintf_buffers { + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; +}; + struct bpf_bprintf_data { u32 *bin_args; char *buf; @@ -3558,9 +3568,12 @@ struct bpf_bprintf_data { bool get_buf; }; -int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, +int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); +int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs); +void bpf_put_buffers(void); + #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); -- cgit v1.2.3 From 5ab154f1463a111e1dc8fd5d31eaa7a2a71fe2e6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Jul 2025 13:48:08 -0700 Subject: bpf: Introduce BPF standard streams Add support for a stream API to the kernel and expose related kfuncs to BPF programs. Two streams are exposed, BPF_STDOUT and BPF_STDERR. These can be used for printing messages that can be consumed from user space, thus it's similar in spirit to existing trace_pipe interface. The kernel will use the BPF_STDERR stream to notify the program of any errors encountered at runtime. BPF programs themselves may use both streams for writing debug messages. BPF library-like code may use BPF_STDERR to print warnings or errors on misuse at runtime. The implementation of a stream is as follows. Everytime a message is emitted from the kernel (directly, or through a BPF program), a record is allocated by bump allocating from per-cpu region backed by a page obtained using alloc_pages_nolock(). This ensures that we can allocate memory from any context. The eventual plan is to discard this scheme in favor of Alexei's kmalloc_nolock() [0]. This record is then locklessly inserted into a list (llist_add()) so that the printing side doesn't require holding any locks, and works in any context. Each stream has a maximum capacity of 4MB of text, and each printed message is accounted against this limit. Messages from a program are emitted using the bpf_stream_vprintk kfunc, which takes a stream_id argument in addition to working otherwise similar to bpf_trace_vprintk. The bprintf buffer helpers are extracted out to be reused for printing the string into them before copying it into the stream, so that we can (with the defined max limit) format a string and know its true length before performing allocations of the stream element. For consuming elements from a stream, we expose a bpf(2) syscall command named BPF_PROG_STREAM_READ_BY_FD, which allows reading data from the stream of a given prog_fd into a user space buffer. The main logic is implemented in bpf_stream_read(). The log messages are queued in bpf_stream::log by the bpf_stream_vprintk kfunc, and then pulled and ordered correctly in the stream backlog. For this purpose, we hold a lock around bpf_stream_backlog_peek(), as llist_del_first() (if we maintained a second lockless list for the backlog) wouldn't be safe from multiple threads anyway. Then, if we fail to find something in the backlog log, we splice out everything from the lockless log, and place it in the backlog log, and then return the head of the backlog. Once the full length of the element is consumed, we will pop it and free it. The lockless list bpf_stream::log is a LIFO stack. Elements obtained using a llist_del_all() operation are in LIFO order, thus would break the chronological ordering if printed directly. Hence, this batch of messages is first reversed. Then, it is stashed into a separate list in the stream, i.e. the backlog_log. The head of this list is the actual message that should always be returned to the caller. All of this is done in bpf_stream_backlog_fill(). From the kernel side, the writing into the stream will be a bit more involved than the typical printk. First, the kernel typically may print a collection of messages into the stream, and parallel writers into the stream may suffer from interleaving of messages. To ensure each group of messages is visible atomically, we can lift the advantage of using a lockless list for pushing in messages. To enable this, we add a bpf_stream_stage() macro, and require kernel users to use bpf_stream_printk statements for the passed expression to write into the stream. Underneath the macro, we have a message staging API, where a bpf_stream_stage object on the stack accumulates the messages being printed into a local llist_head, and then a commit operation splices the whole batch into the stream's lockless log list. This is especially pertinent for rqspinlock deadlock messages printed to program streams. After this change, we see each deadlock invocation as a non-interleaving contiguous message without any confusion on the reader's part, improving their user experience in debugging the fault. While programs cannot benefit from this staged stream writing API, they could just as well hold an rqspinlock around their print statements to serialize messages, hence this is kept kernel-internal for now. Overall, this infrastructure provides NMI-safe any context printing of messages to two dedicated streams. Later patches will add support for printing splats in case of BPF arena page faults, rqspinlock deadlocks, and cond_break timeouts, and integration of this facility into bpftool for dumping messages to user space. [0]: https://lore.kernel.org/bpf/20250501032718.65476-1-alexei.starovoitov@gmail.com Reviewed-by: Eduard Zingerman Reviewed-by: Emil Tsalapatis Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250703204818.925464-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/bpf.h | 24 ++++++++++++++++++++++ 2 files changed, 76 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a07451457d9e..f61aeccb23c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1538,6 +1538,37 @@ struct btf_mod_pair { struct bpf_kfunc_desc_tab; +enum bpf_stream_id { + BPF_STDOUT = 1, + BPF_STDERR = 2, +}; + +struct bpf_stream_elem { + struct llist_node node; + int total_len; + int consumed_len; + char str[]; +}; + +enum { + /* 100k bytes */ + BPF_STREAM_MAX_CAPACITY = 100000ULL, +}; + +struct bpf_stream { + atomic_t capacity; + struct llist_head log; /* list of in-flight stream elements in LIFO order */ + + struct mutex lock; /* lock protecting backlog_{head,tail} */ + struct llist_node *backlog_head; /* list of in-flight stream elements in FIFO order */ + struct llist_node *backlog_tail; /* tail of the list above */ +}; + +struct bpf_stream_stage { + struct llist_head log; + int len; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@ -1646,6 +1677,7 @@ struct bpf_prog_aux { struct work_struct work; struct rcu_head rcu; }; + struct bpf_stream stream[2]; }; struct bpf_prog { @@ -2409,6 +2441,7 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); + int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG @@ -3574,6 +3607,25 @@ void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs); void bpf_put_buffers(void); +void bpf_prog_stream_init(struct bpf_prog *prog); +void bpf_prog_stream_free(struct bpf_prog *prog); +int bpf_prog_stream_read(struct bpf_prog *prog, enum bpf_stream_id stream_id, void __user *buf, int len); +void bpf_stream_stage_init(struct bpf_stream_stage *ss); +void bpf_stream_stage_free(struct bpf_stream_stage *ss); +__printf(2, 3) +int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...); +int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog, + enum bpf_stream_id stream_id); + +#define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__) + +#define bpf_stream_stage(ss, prog, stream_id, expr) \ + ({ \ + bpf_stream_stage_init(&ss); \ + (expr); \ + bpf_stream_stage_commit(&ss, prog, stream_id); \ + bpf_stream_stage_free(&ss); \ + }) #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 719ba230032f..0670e15a6100 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,17 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -961,6 +972,7 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, }; @@ -1463,6 +1475,11 @@ struct bpf_stack_build_id { #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1849,6 +1866,13 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 0e521efaf36350b8f783984541efa56f560c90b0 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Jul 2025 13:48:09 -0700 Subject: bpf: Add function to extract program source info Prepare a function for use in future patches that can extract the file info, line info, and the source line number for a given BPF program provided it's program counter. Only the basename of the file path is provided, given it can be excessively long in some cases. This will be used in later patches to print source info to the BPF stream. Reviewed-by: Emil Tsalapatis Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250703204818.925464-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f61aeccb23c3..c3802af11bac 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3661,4 +3661,7 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) return prog->aux->func_idx != 0; } +int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, + const char **linep, int *nump); + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From f0c53fd4a742f957da7077a691a85ef9775907dc Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Jul 2025 13:48:11 -0700 Subject: bpf: Add function to find program from stack trace In preparation of figuring out the closest program that led to the current point in the kernel, implement a function that scans through the stack trace and finds out the closest BPF program when walking down the stack trace. Special care needs to be taken to skip over kernel and BPF subprog frames. We basically scan until we find a BPF main prog frame. The assumption is that if a program calls into us transitively, we'll hit it along the way. If not, we end up returning NULL. Contextually the function will be used in places where we know the program may have called into us. Due to reliance on arch_bpf_stack_walk(), this function only works on x86 with CONFIG_UNWINDER_ORC, arm64, and s390. Remove the warning from arch_bpf_stack_walk as well since we call it outside bpf_throw() context. Acked-by: Eduard Zingerman Reviewed-by: Emil Tsalapatis Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250703204818.925464-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3802af11bac..b267c378d884 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3663,5 +3663,6 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, const char **linep, int *nump); +struct bpf_prog *bpf_prog_find_from_stack(void); #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From d7c431cafcb4917b0d87b5cd10637cd47b6c8d79 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Jul 2025 13:48:12 -0700 Subject: bpf: Add dump_stack() analogue to print to BPF stderr Introduce a kernel function which is the analogue of dump_stack() printing some useful information and the stack trace. This is not exposed to BPF programs yet, but can be made available in the future. When we have a program counter for a BPF program in the stack trace, also additionally output the filename and line number to make the trace helpful. The rest of the trace can be passed into ./decode_stacktrace.sh to obtain the line numbers for kernel symbols. Reviewed-by: Emil Tsalapatis Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250703204818.925464-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b267c378d884..34dd90ec7fad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3616,8 +3616,10 @@ __printf(2, 3) int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...); int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog, enum bpf_stream_id stream_id); +int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss); #define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__) +#define bpf_stream_dump_stack(ss) bpf_stream_stage_dump_stack(&ss) #define bpf_stream_stage(ss, prog, stream_id, expr) \ ({ \ -- cgit v1.2.3 From 82bc4abf28d8147dd5da9ba52f0aa1bac23c125e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 3 Jul 2025 07:11:17 -0700 Subject: bpf: Avoid putting struct bpf_scc_callchain variables on the stack Add a 'struct bpf_scc_callchain callchain_buf' field in bpf_verifier_env. This way, the previous bpf_scc_callchain local variables can be replaced by taking address of env->callchain_buf. This can reduce stack usage and fix the following error: kernel/bpf/verifier.c:19921:12: error: stack frame size (1368) exceeds limit (1280) in 'do_check' [-Werror,-Wframe-larger-than] Reported-by: Arnd Bergmann Acked-by: Jiri Olsa Acked-by: Eduard Zingerman Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250703141117.1485108-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7e459e839f8b..94defa405c85 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -841,6 +841,7 @@ struct bpf_verifier_env { char tmp_str_buf[TMP_STR_BUF_LEN]; struct bpf_insn insn_buf[INSN_BUF_SIZE]; struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; + struct bpf_scc_callchain callchain_buf; /* array of pointers to bpf_scc_info indexed by SCC id */ struct bpf_scc_info **scc_info; u32 scc_cnt; -- cgit v1.2.3 From 2b9996417e4ec231c91818f9ea8107ae62ef75ad Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 4 Jul 2025 00:23:08 +0200 Subject: af_unix/scm: fix whitespace errors Fix whitespace/formatting errors. Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Cc: David S. Miller Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Leon Romanovsky Cc: Arnd Bergmann Cc: Christian Brauner Cc: Kuniyuki Iwashima Cc: Lennart Poettering Cc: Luca Boccassi Cc: David Rheinsberg Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/20250703222314.309967-5-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Kuniyuki Iwashima Signed-off-by: Christian Brauner --- include/net/scm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/scm.h b/include/net/scm.h index 84c4707e78a5..c52519669349 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -69,7 +69,7 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co static __inline__ void scm_set_cred(struct scm_cookie *scm, struct pid *pid, kuid_t uid, kgid_t gid) { - scm->pid = get_pid(pid); + scm->pid = get_pid(pid); scm->creds.pid = pid_vnr(pid); scm->creds.uid = uid; scm->creds.gid = gid; @@ -78,7 +78,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm, static __inline__ void scm_destroy_cred(struct scm_cookie *scm) { put_pid(scm->pid); - scm->pid = NULL; + scm->pid = NULL; } static __inline__ void scm_destroy(struct scm_cookie *scm) -- cgit v1.2.3 From 6d4405b16d37090a0c905079eab951cfb5044a65 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 3 Jul 2025 19:36:19 +0900 Subject: ata: libata-core: Cache the general purpose log directory The function ata_log_supported() tests if a log page is supported by a device using the General Purpose Log Directory log page, which lists the size of all surported log pages. However, this log page is read from the device using ata_read_log_page() every time ata_log_supported() is called. That is not necessary. Avoid reading the General Purpose Log Directory log page by caching its content in the gp_log_dir buffer defined as part of struct ata_device. The functions ata_read_log_directory() and ata_clear_log_directory() are introduced to manage this buffer. ata_clear_log_directory() zero-fill the gp_log_dir buffer every time ata_dev_configure() is called, that is, when the device is first scanned and when it is being revalidated. The function ata_log_supported() is modified to call ata_read_log_directory() instead of ata_read_log_page(). The function ata_read_log_directory() calls ata_read_log_page() to read the General Purpose Log Directory log page from the device only if the first 16-bits word of the log is not equal to 0x0001, that is, it is not equal to the ACS mandated value for the log version. With this, the log page is read from the device only once for every ata_dev_configure() call. For instance, with pr_debug enabled, a call to ata_dev_configure() before this patch generates the following log page accesses: ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x13, page 0x0 ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x12, page 0x0 ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x30, page 0x0 ata3.00: read log page - log 0x30, page 0x8 ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x30, page 0x0 ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x30, page 0x0 ata3.00: read log page - log 0x30, page 0x3 ata3.00: read log page - log 0x30, page 0x4 ata3.00: read log page - log 0x18, page 0x0 That is, the general purpose log directory page is read 7 times. With this patch applied, the number of accesses to this log page is reduced to one: ata3.00: read log page - log 0x0, page 0x0 ata3.00: read log page - log 0x13, page 0x0 ata3.00: read log page - log 0x12, page 0x0 ata3.00: read log page - log 0x30, page 0x0 ata3.00: read log page - log 0x30, page 0x8 ata3.00: read log page - log 0x30, page 0x0 ata3.00: read log page - log 0x30, page 0x0 ata3.00: read log page - log 0x30, page 0x3 ata3.00: read log page - log 0x30, page 0x4 ata3.00: read log page - log 0x18, page 0x0 Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Link: https://lore.kernel.org/r/20250703103622.291272-2-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7462218312ad..78a4addc6659 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -761,6 +761,9 @@ struct ata_device { u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ } ____cacheline_aligned; + /* General Purpose Log Directory log page */ + u8 gp_log_dir[ATA_SECT_SIZE] ____cacheline_aligned; + /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; -- cgit v1.2.3 From 31921e87b2d2614e261096fdabedef1db7679611 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 3 Jul 2025 19:36:22 +0900 Subject: ata: libata-core: Rename ata_do_set_mode() With the renaming of libata-eh ata_set_mode() function to ata_eh_set_mode(), libata-core function ata_do_set_mode() can now be renamed to the simpler ata_set_mode(). All the call sites of the former ata_do_set_mode() are updated to use the new function name. No functional changes. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Link: https://lore.kernel.org/r/20250703103622.291272-5-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 78a4addc6659..d092747be588 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1218,7 +1218,7 @@ extern int ata_ncq_prio_enabled(struct ata_port *ap, struct scsi_device *sdev, extern int ata_ncq_prio_enable(struct ata_port *ap, struct scsi_device *sdev, bool enable); extern struct ata_device *ata_dev_pair(struct ata_device *adev); -extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); +int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q); -- cgit v1.2.3 From 9bc35edb9ac76b23285e7ceafa1576ce53da9b6c Mon Sep 17 00:00:00 2001 From: Etienne Carriere Date: Tue, 3 Jun 2025 11:02:09 +0200 Subject: dt-bindings: regulator: Add STM32MP15 SCMI regulator identifiers These bindings will be used for the SCMI voltage domain. Signed-off-by: Etienne Carriere Signed-off-by: Pascal Paillet Signed-off-by: Amelie Delaunay Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250603-stm32mp157f-dk2-v2-3-5be0854a9299@foss.st.com Signed-off-by: Alexandre Torgue --- .../dt-bindings/regulator/st,stm32mp15-regulator.h | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/dt-bindings/regulator/st,stm32mp15-regulator.h (limited to 'include') diff --git a/include/dt-bindings/regulator/st,stm32mp15-regulator.h b/include/dt-bindings/regulator/st,stm32mp15-regulator.h new file mode 100644 index 000000000000..7052507cb3e5 --- /dev/null +++ b/include/dt-bindings/regulator/st,stm32mp15-regulator.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2025, STMicroelectronics - All Rights Reserved + */ + +#ifndef __DT_BINDINGS_REGULATOR_ST_STM32MP15_REGULATOR_H +#define __DT_BINDINGS_REGULATOR_ST_STM32MP15_REGULATOR_H + +/* SCMI voltage domain identifiers */ + +/* SOC Internal regulators */ +#define VOLTD_SCMI_REG11 0 +#define VOLTD_SCMI_REG18 1 +#define VOLTD_SCMI_USB33 2 + +/* STPMIC1 regulators */ +#define VOLTD_SCMI_STPMIC1_BUCK1 3 +#define VOLTD_SCMI_STPMIC1_BUCK2 4 +#define VOLTD_SCMI_STPMIC1_BUCK3 5 +#define VOLTD_SCMI_STPMIC1_BUCK4 6 +#define VOLTD_SCMI_STPMIC1_LDO1 7 +#define VOLTD_SCMI_STPMIC1_LDO2 8 +#define VOLTD_SCMI_STPMIC1_LDO3 9 +#define VOLTD_SCMI_STPMIC1_LDO4 10 +#define VOLTD_SCMI_STPMIC1_LDO5 11 +#define VOLTD_SCMI_STPMIC1_LDO6 12 +#define VOLTD_SCMI_STPMIC1_VREFDDR 13 +#define VOLTD_SCMI_STPMIC1_BOOST 14 +#define VOLTD_SCMI_STPMIC1_PWR_SW1 15 +#define VOLTD_SCMI_STPMIC1_PWR_SW2 16 +#define VOLTD_SCMI_VREFBUF 17 + +/* External regulators */ +#define VOLTD_SCMI_REGU0 18 +#define VOLTD_SCMI_REGU1 19 +#define VOLTD_SCMI_REGU2 20 +#define VOLTD_SCMI_REGU3 21 +#define VOLTD_SCMI_REGU4 22 + +#endif /*__DT_BINDINGS_REGULATOR_ST_STM32MP15_REGULATOR_H */ -- cgit v1.2.3 From 60bc47b5a0b164082d448815d7db3066266aa3ed Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 1 Jul 2025 19:02:13 +0800 Subject: watchdog/perf: Provide function for adjusting the event period Architecture's using perf events for hard lockup detection needs to convert the watchdog_thresh to the event's period, some architecture for example arm64 perform this conversion using the CPU's maximum frequency which will be acquired by cpufreq. However by the time the lockup detector's initialized the cpufreq driver may not be initialized, thus launch a watchdog with inaccurate period. Provide a function hardlockup_detector_perf_adjust_period() to allowing adjust the event period. Then architecture can update with more accurate period if cpufreq is initialized. Reviewed-by: Douglas Anderson Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250701110214.27242-2-yangyicong@huawei.com Signed-off-by: Will Deacon --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e78fa535f61d..cf3c6ab408aa 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -103,10 +103,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_config_perf_event(const char *str); +extern void hardlockup_detector_perf_adjust_period(u64 period); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_config_perf_event(const char *str) { } +static inline void hardlockup_detector_perf_adjust_period(u64 period) { } #endif void watchdog_hardlockup_stop(void); -- cgit v1.2.3 From 348fe34a6186a53acda44a3501d4e5a4f1f5958e Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 4 Jul 2025 13:25:45 +0530 Subject: drm: move drm based debugfs funcs to drm_debugfs.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Requirement is to create per client-id based directories to hold key debugging information and for that access to root debugfs dentry is need which is not in one place and that information cannot be stored in drm_device. Move the debugfs functionality from drm_drv.c and drm_accel.c to drm_debugfs.c This enables debugfs root node reference directly drm_debugfs.c and hence enable to create per client-id directory. v8: Create drm_accel dentry only if it's config is enabled (Jeff, Hugo) v8: Merge drm_drv and drm_accel debugfs patches (Koenig, Christian) v10: Since we moved drm_debugfs_root, hence to handle drm bridge debugfs add a new function which call drm_bridge_debugfs_params where drm_debugfs_root is accessible. Suggested-by: Christian König Signed-off-by: Sunil Khatri Link: https://lore.kernel.org/r/20250704075548.1549849-2-sunil.khatri@amd.com Reviewed-by: Christian König Signed-off-by: Christian König --- include/drm/drm_accel.h | 5 ----- include/drm/drm_drv.h | 19 +++++++++++++++++-- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h index 038ccb02f9a3..20a665ec6f16 100644 --- a/include/drm/drm_accel.h +++ b/include/drm/drm_accel.h @@ -58,7 +58,6 @@ void accel_core_exit(void); int accel_core_init(void); void accel_set_device_instance_params(struct device *kdev, int index); int accel_open(struct inode *inode, struct file *filp); -void accel_debugfs_init(struct drm_device *dev); void accel_debugfs_register(struct drm_device *dev); #else @@ -77,10 +76,6 @@ static inline void accel_set_device_instance_params(struct device *kdev, int ind { } -static inline void accel_debugfs_init(struct drm_device *dev) -{ -} - static inline void accel_debugfs_register(struct drm_device *dev) { } diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 3f76a32d6b84..42fc085f986d 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -572,9 +572,24 @@ static inline bool drm_firmware_drivers_only(void) } #if defined(CONFIG_DEBUG_FS) -void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root); +void drm_debugfs_dev_init(struct drm_device *dev); +void drm_debugfs_init_root(void); +void drm_debugfs_remove_root(void); +void drm_debugfs_bridge_params(void); #else -static inline void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root) +static inline void drm_debugfs_dev_init(struct drm_device *dev) +{ +} + +static inline void drm_debugfs_init_root(void) +{ +} + +static inline void drm_debugfs_remove_root(void) +{ +} + +static inline void drm_debugfs_bridge_params(void) { } #endif -- cgit v1.2.3 From 1fd45bc21cecea390ab9c21a2406bbbe3eed4b93 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 4 Jul 2025 13:25:46 +0530 Subject: drm: add debugfs support on per client-id basis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add support to add a directory for each client-id with root at the dri level. Since the clients are unique and not just related to one single drm device, so it makes more sense to add all the client based nodes with root as dri. Also create a debugfs file which show the process information for the client and create a symlink back to the parent drm device from each client. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250704075548.1549849-3-sunil.khatri@amd.com Signed-off-by: Christian König --- include/drm/drm_debugfs.h | 11 +++++++++++ include/drm/drm_file.h | 7 +++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/drm/drm_debugfs.h b/include/drm/drm_debugfs.h index cf06cee4343f..ea8cba94208a 100644 --- a/include/drm/drm_debugfs.h +++ b/include/drm/drm_debugfs.h @@ -153,6 +153,9 @@ void drm_debugfs_add_files(struct drm_device *dev, int drm_debugfs_gpuva_info(struct seq_file *m, struct drm_gpuvm *gpuvm); + +void drm_debugfs_clients_add(struct drm_file *file); +void drm_debugfs_clients_remove(struct drm_file *file); #else static inline void drm_debugfs_create_files(const struct drm_info_list *files, int count, struct dentry *root, @@ -181,6 +184,14 @@ static inline int drm_debugfs_gpuva_info(struct seq_file *m, { return 0; } + +static inline void drm_debugfs_clients_add(struct drm_file *file) +{ +} + +static inline void drm_debugfs_clients_remove(struct drm_file *file) +{ +} #endif #endif /* _DRM_DEBUGFS_H_ */ diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..eab7546aad79 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -400,6 +400,13 @@ struct drm_file { * @client_name_lock: Protects @client_name. */ struct mutex client_name_lock; + + /** + * @debugfs_client: + * + * debugfs directory for each client under a drm node. + */ + struct dentry *debugfs_client; }; /** -- cgit v1.2.3 From ca115d7e754691c0219eec95ec94dbac7f87daef Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 3 Jul 2025 09:36:41 +0200 Subject: tree-wide: s/struct fileattr/struct file_kattr/g Now that we expose struct file_attr as our uapi struct rename all the internal struct to struct file_kattr to clearly communicate that it is a kernel internal struct. This is similar to struct mount_{k}attr and others. Link: https://lore.kernel.org/20250703-restlaufzeit-baurecht-9ed44552b481@brauner Signed-off-by: Christian Brauner --- include/linux/fileattr.h | 14 +++++++------- include/linux/fs.h | 6 +++--- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/security.h | 8 ++++---- include/uapi/linux/fs.h | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index e2a2f4ae242d..f89dcfad3f8f 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -40,7 +40,7 @@ * is handled by the VFS helpers, so filesystems are free to implement just one * or both of these sub-interfaces. */ -struct fileattr { +struct file_kattr { u32 flags; /* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */ /* struct fsxattr: */ u32 fsx_xflags; /* xflags field value (get/set) */ @@ -53,10 +53,10 @@ struct fileattr { bool fsx_valid:1; }; -int copy_fsxattr_to_user(const struct fileattr *fa, struct fsxattr __user *ufa); +int copy_fsxattr_to_user(const struct file_kattr *fa, struct fsxattr __user *ufa); -void fileattr_fill_xflags(struct fileattr *fa, u32 xflags); -void fileattr_fill_flags(struct fileattr *fa, u32 flags); +void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags); +void fileattr_fill_flags(struct file_kattr *fa, u32 flags); /** * fileattr_has_fsx - check for extended flags/attributes @@ -65,16 +65,16 @@ void fileattr_fill_flags(struct fileattr *fa, u32 flags); * Return: true if any attributes are present that are not represented in * ->flags. */ -static inline bool fileattr_has_fsx(const struct fileattr *fa) +static inline bool fileattr_has_fsx(const struct file_kattr *fa) { return fa->fsx_valid && ((fa->fsx_xflags & ~FS_XFLAG_COMMON) || fa->fsx_extsize != 0 || fa->fsx_projid != 0 || fa->fsx_cowextsize != 0); } -int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int vfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, - struct fileattr *fa); + struct file_kattr *fa); int ioctl_getflags(struct file *file, unsigned int __user *argp); int ioctl_setflags(struct file *file, unsigned int __user *argp); int ioctl_fsgetxattr(struct file *file, void __user *argp); diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..0c58617645ea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -80,7 +80,7 @@ struct fsnotify_mark_connector; struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; -struct fileattr; +struct file_kattr; struct iomap_ops; extern void __init inode_init(void); @@ -2254,8 +2254,8 @@ struct inode_operations { int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa); - int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); + struct dentry *dentry, struct file_kattr *fa); + int (*fileattr_get)(struct dentry *dentry, struct file_kattr *fa); struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9600a4350e79..fd11fffdd3c3 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -157,8 +157,8 @@ LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, const char *name) -LSM_HOOK(int, 0, inode_file_setattr, struct dentry *dentry, struct fileattr *fa) -LSM_HOOK(int, 0, inode_file_getattr, struct dentry *dentry, struct fileattr *fa) +LSM_HOOK(int, 0, inode_file_setattr, struct dentry *dentry, struct file_kattr *fa) +LSM_HOOK(int, 0, inode_file_getattr, struct dentry *dentry, struct file_kattr *fa) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, diff --git a/include/linux/security.h b/include/linux/security.h index 9ed0d0e0c81f..b95b5540c429 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -452,9 +452,9 @@ int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); void security_inode_post_removexattr(struct dentry *dentry, const char *name); int security_inode_file_setattr(struct dentry *dentry, - struct fileattr *fa); + struct file_kattr *fa); int security_inode_file_getattr(struct dentry *dentry, - struct fileattr *fa); + struct file_kattr *fa); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, @@ -1057,13 +1057,13 @@ static inline void security_inode_post_removexattr(struct dentry *dentry, { } static inline int security_inode_file_setattr(struct dentry *dentry, - struct fileattr *fa) + struct file_kattr *fa) { return 0; } static inline int security_inode_file_getattr(struct dentry *dentry, - struct fileattr *fa) + struct file_kattr *fa) { return 0; } diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 9663dbdda181..6e136c9c6a22 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -151,7 +151,7 @@ struct fsxattr { /* * Variable size structure for file_[sg]et_attr(). * - * Note. This is alternative to the structure 'struct fileattr'/'struct fsxattr'. + * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'. * As this structure is passed to/from userspace with its size, this can * be versioned based on the size. */ -- cgit v1.2.3 From 3135d5be7c27841526d98150c245304ab312e9f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:34 -0700 Subject: lib/crypto: sha256: Reorder some code First, move the declarations of sha224_init/update/final to be just above the corresponding SHA-256 code, matching the order that I used for SHA-384 and SHA-512. In sha2.h, the end result is that SHA-224, SHA-256, SHA-384, and SHA-512 are all in the logical order. Second, move sha224_block_init() and sha256_block_init() to be just below crypto_sha256_state. In later changes, these functions as well as struct crypto_sha256_state will no longer be used by the library functions. They'll remain just for some legacy offload drivers. This gets them into a logical place in the file for that. No code changes other than reordering. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 296ce9d468bf..bb181b7996cd 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -71,6 +71,32 @@ struct crypto_sha256_state { u64 count; }; +static inline void sha224_block_init(struct crypto_sha256_state *sctx) +{ + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; +} + +static inline void sha256_block_init(struct crypto_sha256_state *sctx) +{ + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; +} + struct sha256_state { union { struct crypto_sha256_state ctx; @@ -88,18 +114,12 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -static inline void sha256_block_init(struct crypto_sha256_state *sctx) +static inline void sha224_init(struct sha256_state *sctx) { - sctx->state[0] = SHA256_H0; - sctx->state[1] = SHA256_H1; - sctx->state[2] = SHA256_H2; - sctx->state[3] = SHA256_H3; - sctx->state[4] = SHA256_H4; - sctx->state[5] = SHA256_H5; - sctx->state[6] = SHA256_H6; - sctx->state[7] = SHA256_H7; - sctx->count = 0; + sha224_block_init(&sctx->ctx); } +/* Simply use sha256_update as it is equivalent to sha224_update. */ +void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); static inline void sha256_init(struct sha256_state *sctx) { @@ -109,26 +129,6 @@ void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]); void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); -static inline void sha224_block_init(struct crypto_sha256_state *sctx) -{ - sctx->state[0] = SHA224_H0; - sctx->state[1] = SHA224_H1; - sctx->state[2] = SHA224_H2; - sctx->state[3] = SHA224_H3; - sctx->state[4] = SHA224_H4; - sctx->state[5] = SHA224_H5; - sctx->state[6] = SHA224_H6; - sctx->state[7] = SHA224_H7; - sctx->count = 0; -} - -static inline void sha224_init(struct sha256_state *sctx) -{ - sha224_block_init(&sctx->ctx); -} -/* Simply use sha256_update as it is equivalent to sha224_update. */ -void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); - /* State for the SHA-512 (and SHA-384) compression function */ struct sha512_block_state { u64 h[8]; -- cgit v1.2.3 From 9f97707bdb1e479ea15e14e5525164f5f1128e97 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:35 -0700 Subject: lib/crypto: sha256: Remove sha256_blocks_simd() Instead of having both sha256_blocks_arch() and sha256_blocks_simd(), instead have just sha256_blocks_arch() which uses the most efficient implementation that is available in the calling context. This is simpler, as it reduces the API surface. It's also safer, since sha256_blocks_arch() just works in all contexts, including contexts where the FPU/SIMD/vector registers cannot be used. This doesn't mean that SHA-256 computations *should* be done in such contexts, but rather we should just do the right thing instead of corrupting a random task's registers. Eliminating this footgun and simplifying the code is well worth the very small performance cost of doing the check. Note: in the case of arm and arm64, what used to be sha256_blocks_arch() is renamed back to its original name of sha256_block_data_order(). sha256_blocks_arch() is now used for the higher-level dispatch function. This renaming also required an update to lib/crypto/arm64/sha512.h, since sha2-armv8.pl is shared by both SHA-256 and SHA-512. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index 21a27fd5e198..5a25ccc49388 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -3,7 +3,6 @@ #ifndef _CRYPTO_INTERNAL_SHA2_H #define _CRYPTO_INTERNAL_SHA2_H -#include #include #include #include @@ -22,8 +21,6 @@ void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], - const u8 *data, size_t nblocks); static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, @@ -31,9 +28,6 @@ static __always_inline void sha256_choose_blocks( { if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic) sha256_blocks_generic(state, data, nblocks); - else if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD) && - (force_simd || crypto_simd_usable())) - sha256_blocks_simd(state, data, nblocks); else sha256_blocks_arch(state, data, nblocks); } -- cgit v1.2.3 From 6fa4b292204b15e0e269a9fd33bc99b5e36b6883 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:36 -0700 Subject: lib/crypto: sha256: Add sha224() and sha224_update() Add a one-shot SHA-224 computation function sha224(), for consistency with sha256(), sha384(), and sha512() which all already exist. Similarly, add sha224_update(). While for now it's identical to sha256_update(), omitting it makes the API harder to use since users have to "know" which functions are the same between SHA-224 and SHA-256. Also, this is a prerequisite for using different context types for each. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index bb181b7996cd..e31da0743a52 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -114,18 +114,24 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; +void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); + static inline void sha224_init(struct sha256_state *sctx) { sha224_block_init(&sctx->ctx); } -/* Simply use sha256_update as it is equivalent to sha224_update. */ +static inline void sha224_update(struct sha256_state *sctx, + const u8 *data, size_t len) +{ + sha256_update(sctx, data, len); +} void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); +void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]); static inline void sha256_init(struct sha256_state *sctx) { sha256_block_init(&sctx->ctx); } -void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]); void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); -- cgit v1.2.3 From b86ced882b8e667758afddffd8d6354197842110 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:37 -0700 Subject: lib/crypto: sha256: Make library API use strongly-typed contexts Currently the SHA-224 and SHA-256 library functions can be mixed arbitrarily, even in ways that are incorrect, for example using sha224_init() and sha256_final(). This is because they operate on the same structure, sha256_state. Introduce stronger typing, as I did for SHA-384 and SHA-512. Also as I did for SHA-384 and SHA-512, use the names *_ctx instead of *_state. The *_ctx names have the following small benefits: - They're shorter. - They avoid an ambiguity with the compression function state. - They're consistent with the well-known OpenSSL API. - Users usually name the variable 'sctx' anyway, which suggests that *_ctx would be the more natural name for the actual struct. Therefore: update the SHA-224 and SHA-256 APIs, implementation, and calling code accordingly. In the new structs, also strongly-type the compression function state. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 52 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index e31da0743a52..18e1eec841b7 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -114,25 +114,55 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); +/* State for the SHA-256 (and SHA-224) compression function */ +struct sha256_block_state { + u32 h[SHA256_STATE_WORDS]; +}; -static inline void sha224_init(struct sha256_state *sctx) -{ - sha224_block_init(&sctx->ctx); -} -static inline void sha224_update(struct sha256_state *sctx, +/* + * Context structure, shared by SHA-224 and SHA-256. The sha224_ctx and + * sha256_ctx structs wrap this one so that the API has proper typing and + * doesn't allow mixing the SHA-224 and SHA-256 functions arbitrarily. + */ +struct __sha256_ctx { + struct sha256_block_state state; + u64 bytecount; + u8 buf[SHA256_BLOCK_SIZE] __aligned(__alignof__(__be64)); +}; +void __sha256_update(struct __sha256_ctx *ctx, const u8 *data, size_t len); + +/** + * struct sha224_ctx - Context for hashing a message with SHA-224 + * @ctx: private + */ +struct sha224_ctx { + struct __sha256_ctx ctx; +}; + +void sha224_init(struct sha224_ctx *ctx); +static inline void sha224_update(struct sha224_ctx *ctx, const u8 *data, size_t len) { - sha256_update(sctx, data, len); + __sha256_update(&ctx->ctx, data, len); } -void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); +void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]); void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]); -static inline void sha256_init(struct sha256_state *sctx) +/** + * struct sha256_ctx - Context for hashing a message with SHA-256 + * @ctx: private + */ +struct sha256_ctx { + struct __sha256_ctx ctx; +}; + +void sha256_init(struct sha256_ctx *ctx); +static inline void sha256_update(struct sha256_ctx *ctx, + const u8 *data, size_t len) { - sha256_block_init(&sctx->ctx); + __sha256_update(&ctx->ctx, data, len); } -void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]); +void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); /* State for the SHA-512 (and SHA-384) compression function */ -- cgit v1.2.3 From 4c855d5069ee2edbcf62fafc7f1a5d4cfea1bce1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:38 -0700 Subject: lib/crypto: sha256: Propagate sha256_block_state type to implementations The previous commit made the SHA-256 compression function state be strongly typed, but it wasn't propagated all the way down to the implementations of it. Do that now. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index 5a25ccc49388..f0f455477bbd 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -17,9 +17,9 @@ static inline bool sha256_is_arch_optimized(void) return false; } #endif -void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], +void sha256_blocks_generic(struct sha256_block_state *state, const u8 *data, size_t nblocks); -void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], +void sha256_blocks_arch(struct sha256_block_state *state, const u8 *data, size_t nblocks); static __always_inline void sha256_choose_blocks( @@ -27,9 +27,9 @@ static __always_inline void sha256_choose_blocks( bool force_generic, bool force_simd) { if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic) - sha256_blocks_generic(state, data, nblocks); + sha256_blocks_generic((struct sha256_block_state *)state, data, nblocks); else - sha256_blocks_arch(state, data, nblocks); + sha256_blocks_arch((struct sha256_block_state *)state, data, nblocks); } static __always_inline void sha256_finup( -- cgit v1.2.3 From 077833cd600908359391bd22d5350c9106ea238c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:39 -0700 Subject: lib/crypto: sha256: Add HMAC-SHA224 and HMAC-SHA256 support Since HMAC support is commonly needed and is fairly simple, include it as a first-class citizen of the SHA-256 library. The API supports both incremental and one-shot computation, and either preparing the key ahead of time or just using a raw key. The implementation is much more streamlined than crypto/hmac.c. I've kept it consistent with the HMAC-SHA384 and HMAC-SHA512 code as much as possible. Testing of these functions will be via sha224_kunit and sha256_kunit, added by a later commit. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 18e1eec841b7..2e3fc2cf4aa0 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -131,6 +131,22 @@ struct __sha256_ctx { }; void __sha256_update(struct __sha256_ctx *ctx, const u8 *data, size_t len); +/* + * HMAC key and message context structs, shared by HMAC-SHA224 and HMAC-SHA256. + * The hmac_sha224_* and hmac_sha256_* structs wrap this one so that the API has + * proper typing and doesn't allow mixing the functions arbitrarily. + */ +struct __hmac_sha256_key { + struct sha256_block_state istate; + struct sha256_block_state ostate; +}; +struct __hmac_sha256_ctx { + struct __sha256_ctx sha_ctx; + struct sha256_block_state ostate; +}; +void __hmac_sha256_init(struct __hmac_sha256_ctx *ctx, + const struct __hmac_sha256_key *key); + /** * struct sha224_ctx - Context for hashing a message with SHA-224 * @ctx: private @@ -148,6 +164,109 @@ static inline void sha224_update(struct sha224_ctx *ctx, void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]); void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]); +/** + * struct hmac_sha224_key - Prepared key for HMAC-SHA224 + * @key: private + */ +struct hmac_sha224_key { + struct __hmac_sha256_key key; +}; + +/** + * struct hmac_sha224_ctx - Context for computing HMAC-SHA224 of a message + * @ctx: private + */ +struct hmac_sha224_ctx { + struct __hmac_sha256_ctx ctx; +}; + +/** + * hmac_sha224_preparekey() - Prepare a key for HMAC-SHA224 + * @key: (output) the key structure to initialize + * @raw_key: the raw HMAC-SHA224 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * Note: the caller is responsible for zeroizing both the struct hmac_sha224_key + * and the raw key once they are no longer needed. + * + * Context: Any context. + */ +void hmac_sha224_preparekey(struct hmac_sha224_key *key, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_sha224_init() - Initialize an HMAC-SHA224 context for a new message + * @ctx: (output) the HMAC context to initialize + * @key: the prepared HMAC key + * + * If you don't need incremental computation, consider hmac_sha224() instead. + * + * Context: Any context. + */ +static inline void hmac_sha224_init(struct hmac_sha224_ctx *ctx, + const struct hmac_sha224_key *key) +{ + __hmac_sha256_init(&ctx->ctx, &key->key); +} + +/** + * hmac_sha224_update() - Update an HMAC-SHA224 context with message data + * @ctx: the HMAC context to update; must have been initialized + * @data: the message data + * @data_len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void hmac_sha224_update(struct hmac_sha224_ctx *ctx, + const u8 *data, size_t data_len) +{ + __sha256_update(&ctx->ctx.sha_ctx, data, data_len); +} + +/** + * hmac_sha224_final() - Finish computing an HMAC-SHA224 value + * @ctx: the HMAC context to finalize; must have been initialized + * @out: (output) the resulting HMAC-SHA224 value + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void hmac_sha224_final(struct hmac_sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]); + +/** + * hmac_sha224() - Compute HMAC-SHA224 in one shot, using a prepared key + * @key: the prepared HMAC key + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA224 value + * + * If you're using the key only once, consider using hmac_sha224_usingrawkey(). + * + * Context: Any context. + */ +void hmac_sha224(const struct hmac_sha224_key *key, + const u8 *data, size_t data_len, u8 out[SHA224_DIGEST_SIZE]); + +/** + * hmac_sha224_usingrawkey() - Compute HMAC-SHA224 in one shot, using a raw key + * @raw_key: the raw HMAC-SHA224 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA224 value + * + * If you're using the key multiple times, prefer to use + * hmac_sha224_preparekey() followed by multiple calls to hmac_sha224() instead. + * + * Context: Any context. + */ +void hmac_sha224_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[SHA224_DIGEST_SIZE]); + /** * struct sha256_ctx - Context for hashing a message with SHA-256 * @ctx: private @@ -165,6 +284,109 @@ static inline void sha256_update(struct sha256_ctx *ctx, void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); +/** + * struct hmac_sha256_key - Prepared key for HMAC-SHA256 + * @key: private + */ +struct hmac_sha256_key { + struct __hmac_sha256_key key; +}; + +/** + * struct hmac_sha256_ctx - Context for computing HMAC-SHA256 of a message + * @ctx: private + */ +struct hmac_sha256_ctx { + struct __hmac_sha256_ctx ctx; +}; + +/** + * hmac_sha256_preparekey() - Prepare a key for HMAC-SHA256 + * @key: (output) the key structure to initialize + * @raw_key: the raw HMAC-SHA256 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * Note: the caller is responsible for zeroizing both the struct hmac_sha256_key + * and the raw key once they are no longer needed. + * + * Context: Any context. + */ +void hmac_sha256_preparekey(struct hmac_sha256_key *key, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_sha256_init() - Initialize an HMAC-SHA256 context for a new message + * @ctx: (output) the HMAC context to initialize + * @key: the prepared HMAC key + * + * If you don't need incremental computation, consider hmac_sha256() instead. + * + * Context: Any context. + */ +static inline void hmac_sha256_init(struct hmac_sha256_ctx *ctx, + const struct hmac_sha256_key *key) +{ + __hmac_sha256_init(&ctx->ctx, &key->key); +} + +/** + * hmac_sha256_update() - Update an HMAC-SHA256 context with message data + * @ctx: the HMAC context to update; must have been initialized + * @data: the message data + * @data_len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void hmac_sha256_update(struct hmac_sha256_ctx *ctx, + const u8 *data, size_t data_len) +{ + __sha256_update(&ctx->ctx.sha_ctx, data, data_len); +} + +/** + * hmac_sha256_final() - Finish computing an HMAC-SHA256 value + * @ctx: the HMAC context to finalize; must have been initialized + * @out: (output) the resulting HMAC-SHA256 value + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void hmac_sha256_final(struct hmac_sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); + +/** + * hmac_sha256() - Compute HMAC-SHA256 in one shot, using a prepared key + * @key: the prepared HMAC key + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA256 value + * + * If you're using the key only once, consider using hmac_sha256_usingrawkey(). + * + * Context: Any context. + */ +void hmac_sha256(const struct hmac_sha256_key *key, + const u8 *data, size_t data_len, u8 out[SHA256_DIGEST_SIZE]); + +/** + * hmac_sha256_usingrawkey() - Compute HMAC-SHA256 in one shot, using a raw key + * @raw_key: the raw HMAC-SHA256 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA256 value + * + * If you're using the key multiple times, prefer to use + * hmac_sha256_preparekey() followed by multiple calls to hmac_sha256() instead. + * + * Context: Any context. + */ +void hmac_sha256_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[SHA256_DIGEST_SIZE]); + /* State for the SHA-512 (and SHA-384) compression function */ struct sha512_block_state { u64 h[8]; -- cgit v1.2.3 From 9f9846a72eec406db9e1eadcad1dd5e90aa0f355 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:42 -0700 Subject: lib/crypto: sha256: Remove sha256_is_arch_optimized() Remove sha256_is_arch_optimized(), since it is no longer used. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index f0f455477bbd..7915a3a46bc8 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -9,14 +9,6 @@ #include #include -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) -bool sha256_is_arch_optimized(void); -#else -static inline bool sha256_is_arch_optimized(void) -{ - return false; -} -#endif void sha256_blocks_generic(struct sha256_block_state *state, const u8 *data, size_t nblocks); void sha256_blocks_arch(struct sha256_block_state *state, -- cgit v1.2.3 From e96cb9507f2d8ba150d417dcd283204564945831 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:43 -0700 Subject: lib/crypto: sha256: Consolidate into single module Consolidate the CPU-based SHA-256 code into a single module, following what I did with SHA-512: - Each arch now provides a header file lib/crypto/$(SRCARCH)/sha256.h, replacing lib/crypto/$(SRCARCH)/sha256.c. The header defines sha256_blocks() and optionally sha256_mod_init_arch(). It is included by lib/crypto/sha256.c, and thus the code gets built into the single libsha256 module, with proper inlining and dead code elimination. - sha256_blocks_generic() is moved from lib/crypto/sha256-generic.c into lib/crypto/sha256.c. It's now a static function marked with __maybe_unused, so the compiler automatically eliminates it in any cases where it's not used. - Whether arch-optimized SHA-256 is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. - Any additional arch-specific translation units for the optimized SHA-256 code (such as assembly files) are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-13-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 52 ------------------------------------------ 1 file changed, 52 deletions(-) delete mode 100644 include/crypto/internal/sha2.h (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h deleted file mode 100644 index 7915a3a46bc8..000000000000 --- a/include/crypto/internal/sha2.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef _CRYPTO_INTERNAL_SHA2_H -#define _CRYPTO_INTERNAL_SHA2_H - -#include -#include -#include -#include -#include - -void sha256_blocks_generic(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -void sha256_blocks_arch(struct sha256_block_state *state, - const u8 *data, size_t nblocks); - -static __always_inline void sha256_choose_blocks( - u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, - bool force_generic, bool force_simd) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic) - sha256_blocks_generic((struct sha256_block_state *)state, data, nblocks); - else - sha256_blocks_arch((struct sha256_block_state *)state, data, nblocks); -} - -static __always_inline void sha256_finup( - struct crypto_sha256_state *sctx, u8 buf[SHA256_BLOCK_SIZE], - size_t len, u8 out[SHA256_DIGEST_SIZE], size_t digest_size, - bool force_generic, bool force_simd) -{ - const size_t bit_offset = SHA256_BLOCK_SIZE - 8; - __be64 *bits = (__be64 *)&buf[bit_offset]; - int i; - - buf[len++] = 0x80; - if (len > bit_offset) { - memset(&buf[len], 0, SHA256_BLOCK_SIZE - len); - sha256_choose_blocks(sctx->state, buf, 1, force_generic, - force_simd); - len = 0; - } - - memset(&buf[len], 0, bit_offset - len); - *bits = cpu_to_be64(sctx->count << 3); - sha256_choose_blocks(sctx->state, buf, 1, force_generic, force_simd); - - for (i = 0; i < digest_size; i += 4) - put_unaligned_be32(sctx->state[i / 4], out + i); -} - -#endif /* _CRYPTO_INTERNAL_SHA2_H */ -- cgit v1.2.3 From b34c9803aabd85189ffacc0d3cdb9ce4515c2b4d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Jun 2025 09:06:45 -0700 Subject: lib/crypto: sha256: Document the SHA-224 and SHA-256 API Add kerneldoc comments, consistent with the kerneldoc comments of the SHA-384 and SHA-512 API. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250630160645.3198-15-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 2e3fc2cf4aa0..e0a08f6addd0 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -155,13 +155,51 @@ struct sha224_ctx { struct __sha256_ctx ctx; }; +/** + * sha224_init() - Initialize a SHA-224 context for a new message + * @ctx: the context to initialize + * + * If you don't need incremental computation, consider sha224() instead. + * + * Context: Any context. + */ void sha224_init(struct sha224_ctx *ctx); + +/** + * sha224_update() - Update a SHA-224 context with message data + * @ctx: the context to update; must have been initialized + * @data: the message data + * @len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ static inline void sha224_update(struct sha224_ctx *ctx, const u8 *data, size_t len) { __sha256_update(&ctx->ctx, data, len); } + +/** + * sha224_final() - Finish computing a SHA-224 message digest + * @ctx: the context to finalize; must have been initialized + * @out: (output) the resulting SHA-224 message digest + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]); + +/** + * sha224() - Compute SHA-224 message digest in one shot + * @data: the message data + * @len: the data length in bytes + * @out: (output) the resulting SHA-224 message digest + * + * Context: Any context. + */ void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]); /** @@ -275,13 +313,51 @@ struct sha256_ctx { struct __sha256_ctx ctx; }; +/** + * sha256_init() - Initialize a SHA-256 context for a new message + * @ctx: the context to initialize + * + * If you don't need incremental computation, consider sha256() instead. + * + * Context: Any context. + */ void sha256_init(struct sha256_ctx *ctx); + +/** + * sha256_update() - Update a SHA-256 context with message data + * @ctx: the context to update; must have been initialized + * @data: the message data + * @len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ static inline void sha256_update(struct sha256_ctx *ctx, const u8 *data, size_t len) { __sha256_update(&ctx->ctx, data, len); } + +/** + * sha256_final() - Finish computing a SHA-256 message digest + * @ctx: the context to finalize; must have been initialized + * @out: (output) the resulting SHA-256 message digest + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); + +/** + * sha256() - Compute SHA-256 message digest in one shot + * @data: the message data + * @len: the data length in bytes + * @out: (output) the resulting SHA-256 message digest + * + * Context: Any context. + */ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); /** -- cgit v1.2.3 From 1c402295c10891988fb2a6fc658e6e95d4852a20 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Tue, 1 Jul 2025 21:50:45 +0530 Subject: dt-bindings: power: qcom,rpmpd: add Turbo L5 corner Update the RPMH level definitions to include TURBO_L5 corner. Acked-by: Krzysztof Kozlowski Signed-off-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/661840/ Signed-off-by: Rob Clark --- include/dt-bindings/power/qcom-rpmpd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index d9b7bac30953..f15bcee7c928 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -240,6 +240,7 @@ #define RPMH_REGULATOR_LEVEL_TURBO_L2 432 #define RPMH_REGULATOR_LEVEL_TURBO_L3 448 #define RPMH_REGULATOR_LEVEL_TURBO_L4 452 +#define RPMH_REGULATOR_LEVEL_TURBO_L5 456 #define RPMH_REGULATOR_LEVEL_SUPER_TURBO 464 #define RPMH_REGULATOR_LEVEL_SUPER_TURBO_NO_CPR 480 -- cgit v1.2.3 From 471920ce25d50bb39bfdaf3c3d9bc9dde30fa265 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:12:45 -0700 Subject: drm/gpuvm: Add locking helpers For UNMAP/REMAP steps we could be needing to lock objects that are not explicitly listed in the VM_BIND ioctl in order to tear-down unmapped VAs. These helpers handle locking/preparing the needed objects. Note that these functions do not strictly require the VM changes to be applied before the next drm_gpuvm_sm_map_lock()/_unmap_lock() call. In the case that VM changes from an earlier drm_gpuvm_sm_map()/_unmap() call result in a differing sequence of steps when the VM changes are actually applied, it will be the same set of GEM objects involved, so the locking is still correct. v2: Rename to drm_gpuvm_sm_*_exec_locked() [Danilo] v3: Expand comments to show expected usage, and explain how the usage is safe in the case of overlapping driver VM_BIND ops. Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Acked-by: Danilo Krummrich Patchwork: https://patchwork.freedesktop.org/patch/661458/ --- include/drm/drm_gpuvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 2a9629377633..274532facfd6 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1211,6 +1211,14 @@ int drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv, int drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, void *priv, u64 addr, u64 range); +int drm_gpuvm_sm_map_exec_lock(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, unsigned int num_fences, + u64 req_addr, u64 req_range, + struct drm_gem_object *obj, u64 offset); + +int drm_gpuvm_sm_unmap_exec_lock(struct drm_gpuvm *gpuvm, struct drm_exec *exec, + u64 req_addr, u64 req_range); + void drm_gpuva_map(struct drm_gpuvm *gpuvm, struct drm_gpuva *va, struct drm_gpuva_op_map *op); -- cgit v1.2.3 From 02070f04987524caf77d4bf4c94ebceb783b7bcc Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:12:46 -0700 Subject: drm/gem: Add ww_acquire_ctx support to drm_gem_lru_scan() If the callback is going to have to attempt to grab more locks, it is useful to have an ww_acquire_ctx to avoid locking order problems. Why not use the drm_exec helper instead? Mainly because (a) where ww_acquire_init() is called is awkward, and (b) we don't really need to retry after backoff, we can just move on to the next object. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661463/ --- include/drm/drm_gem.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 1a79ec3fe45c..d3a7b43e2c63 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -560,10 +560,12 @@ void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock); void drm_gem_lru_remove(struct drm_gem_object *obj); void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj); -unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, - unsigned int nr_to_scan, - unsigned long *remaining, - bool (*shrink)(struct drm_gem_object *obj)); +unsigned long +drm_gem_lru_scan(struct drm_gem_lru *lru, + unsigned int nr_to_scan, + unsigned long *remaining, + bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket), + struct ww_acquire_ctx *ticket); int drm_gem_evict_locked(struct drm_gem_object *obj); -- cgit v1.2.3 From cdd73b1666079a73d061396f361df55d59fe96e6 Mon Sep 17 00:00:00 2001 From: Ariel Otilibili Date: Wed, 2 Jul 2025 12:00:21 +0200 Subject: uapi: fix broken link in linux/capability.h The link to the libcap library is outdated. Instead, use a link to the libcap2 library. As well, give the complete reference of the POSIX compliance. Signed-off-by: Ariel Otilibili Acked-by: Andrew G. Morgan Reviewed-by: Paul Moore Signed-off-by: Serge Hallyn --- include/uapi/linux/capability.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 2e21b5594f81..ea5a0899ecf0 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -6,9 +6,10 @@ * Alexander Kjeldaas * with help from Aleph1, Roland Buresund and Andrew Main. * - * See here for the libcap library ("POSIX draft" compliance): + * See here for the libcap2 library (compliant with Section 25 of + * the withdrawn POSIX 1003.1e Draft 17): * - * ftp://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/ + * https://www.kernel.org/pub/linux/libs/security/linux-privs/libcap2/ */ #ifndef _UAPI_LINUX_CAPABILITY_H -- cgit v1.2.3 From dbbde63c9e9d472743a88f975baac412ba93f29d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:01 -0700 Subject: drm/msm: Add PRR support Add PRR (Partial Resident Region) is a bypass address which make GPU writes go to /dev/null and reads return zero. This is used to implement vulkan sparse residency. To support PRR/NULL mappings, we allocate a page to reserve a physical address which we know will not be used as part of a GEM object, and configure the SMMU to use this address for PRR/NULL mappings. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661486/ --- include/uapi/drm/msm_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2342cb90857e..5bc5e4526ccf 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -91,6 +91,8 @@ struct drm_msm_timespec { #define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ #define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ #define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ +/* PRR (Partially Resident Region) is required for sparse residency: */ +#define MSM_PARAM_HAS_PRR 0x15 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From feb8ef4636a457a1fd916a3ae575f552935e69b9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:05 -0700 Subject: drm/msm: Add opt-in for VM_BIND Add a SET_PARAM for userspace to request to manage to the VM itself, instead of getting a kernel managed VM. In order to transition to a userspace managed VM, this param must be set before any mappings are created. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661494/ --- include/uapi/drm/msm_drm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 5bc5e4526ccf..b974f5a24dbc 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -93,6 +93,30 @@ struct drm_msm_timespec { #define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ /* PRR (Partially Resident Region) is required for sparse residency: */ #define MSM_PARAM_HAS_PRR 0x15 /* RO */ +/* MSM_PARAM_EN_VM_BIND is set to 1 to enable VM_BIND ops. + * + * With VM_BIND enabled, userspace is required to allocate iova and use the + * VM_BIND ops for map/unmap ioctls. MSM_INFO_SET_IOVA and MSM_INFO_GET_IOVA + * will be rejected. (The latter does not have a sensible meaning when a BO + * can have multiple and/or partial mappings.) + * + * With VM_BIND enabled, userspace does not include a submit_bo table in the + * SUBMIT ioctl (this will be rejected), the resident set is determined by + * the the VM_BIND ops. + * + * Enabling VM_BIND will fail on devices which do not have per-process pgtables. + * And it is not allowed to disable VM_BIND once it has been enabled. + * + * Enabling VM_BIND should be done (attempted) prior to allocating any BOs or + * submitqueues of type MSM_SUBMITQUEUE_VM_BIND. + * + * Relatedly, when VM_BIND mode is enabled, the kernel will not try to recover + * from GPU faults or failed async VM_BIND ops, in particular because it is + * difficult to communicate to userspace which op failed so that userspace + * could rewind and try again. When the VM is marked unusable, the SUBMIT + * ioctl will throw -EPIPE. + */ +#define MSM_PARAM_EN_VM_BIND 0x16 /* WO, once */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From b58e12a66e47eaf95b31bbefbc260e5a0b3e638c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:07 -0700 Subject: drm/msm: Add _NO_SHARE flag Buffers that are not shared between contexts can share a single resv object. This way drm_gpuvm will not track them as external objects, and submit-time validating overhead will be O(1) for all N non-shared BOs, instead of O(n). Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661497/ --- include/uapi/drm/msm_drm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index b974f5a24dbc..1bccc347945c 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -140,6 +140,19 @@ struct drm_msm_param { #define MSM_BO_SCANOUT 0x00000001 /* scanout capable */ #define MSM_BO_GPU_READONLY 0x00000002 +/* Private buffers do not need to be explicitly listed in the SUBMIT + * ioctl, unless referenced by a drm_msm_gem_submit_cmd. Private + * buffers may NOT be imported/exported or used for scanout (or any + * other situation where buffers can be indefinitely pinned, but + * cases other than scanout are all kernel owned BOs which are not + * visible to userspace). + * + * In exchange for those constraints, all private BOs associated with + * a single context (drm_file) share a single dma_resv, and if there + * has been no eviction since the last submit, there are no per-BO + * bookeeping to do, significantly cutting the SUBMIT overhead. + */ +#define MSM_BO_NO_SHARE 0x00000004 #define MSM_BO_CACHE_MASK 0x000f0000 /* cache modes */ #define MSM_BO_CACHED 0x00010000 @@ -149,6 +162,7 @@ struct drm_msm_param { #define MSM_BO_FLAGS (MSM_BO_SCANOUT | \ MSM_BO_GPU_READONLY | \ + MSM_BO_NO_SHARE | \ MSM_BO_CACHE_MASK) struct drm_msm_gem_new { -- cgit v1.2.3 From e1341f91450525b94474b75d5e77587d1d84e52c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:12 -0700 Subject: drm/msm: Extract out syncobj helpers We'll be re-using these for the VM_BIND ioctl. Also, rename a few things in the uapi header to reflect that syncobj use is not specific to the submit ioctl. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661512/ --- include/uapi/drm/msm_drm.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 1bccc347945c..2c2fc4b284d0 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -220,6 +220,17 @@ struct drm_msm_gem_cpu_fini { * Cmdstream Submission: */ +#define MSM_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ +#define MSM_SYNCOBJ_FLAGS ( \ + MSM_SYNCOBJ_RESET | \ + 0) + +struct drm_msm_syncobj { + __u32 handle; /* in, syncobj handle. */ + __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */ + __u64 point; /* in, timepoint for timeline syncobjs. */ +}; + /* The value written into the cmdstream is logically: * * ((relocbuf->gpuaddr + reloc_offset) << shift) | or @@ -309,17 +320,6 @@ struct drm_msm_gem_submit_bo { MSM_SUBMIT_FENCE_SN_IN | \ 0) -#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ -#define MSM_SUBMIT_SYNCOBJ_FLAGS ( \ - MSM_SUBMIT_SYNCOBJ_RESET | \ - 0) - -struct drm_msm_gem_submit_syncobj { - __u32 handle; /* in, syncobj handle. */ - __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */ - __u64 point; /* in, timepoint for timeline syncobjs. */ -}; - /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. @@ -333,8 +333,8 @@ struct drm_msm_gem_submit { __u64 cmds; /* in, ptr to array of submit_cmd's */ __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ __u32 queueid; /* in, submitqueue id */ - __u64 in_syncobjs; /* in, ptr to array of drm_msm_gem_submit_syncobj */ - __u64 out_syncobjs; /* in, ptr to array of drm_msm_gem_submit_syncobj */ + __u64 in_syncobjs; /* in, ptr to array of drm_msm_syncobj */ + __u64 out_syncobjs; /* in, ptr to array of drm_msm_syncobj */ __u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */ __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */ __u32 syncobj_stride; /* in, stride of syncobj arrays. */ -- cgit v1.2.3 From 92395af63a9958615edfa9d4ef1ea72c92a00410 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:14 -0700 Subject: drm/msm: Add VM_BIND submitqueue This submitqueue type isn't tied to a hw ringbuffer, but instead executes on the CPU for performing async VM_BIND ops. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661517/ --- include/uapi/drm/msm_drm.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2c2fc4b284d0..6d6cd1219926 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -385,12 +385,19 @@ struct drm_msm_gem_madvise { /* * Draw queues allow the user to set specific submission parameter. Command * submissions specify a specific submitqueue to use. ID 0 is reserved for - * backwards compatibility as a "default" submitqueue + * backwards compatibility as a "default" submitqueue. + * + * Because VM_BIND async updates happen on the CPU, they must run on a + * virtual queue created with the flag MSM_SUBMITQUEUE_VM_BIND. If we had + * a way to do pgtable updates on the GPU, we could drop this restriction. */ #define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_VM_BIND 0x00000002 /* virtual queue for VM_BIND ops */ + #define MSM_SUBMITQUEUE_FLAGS ( \ MSM_SUBMITQUEUE_ALLOW_PREEMPT | \ + MSM_SUBMITQUEUE_VM_BIND | \ 0) /* -- cgit v1.2.3 From 2e6a8a1fe2b262a6dfd0a65041fcd830ee1e7143 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:18 -0700 Subject: drm/msm: Add VM_BIND ioctl Add a VM_BIND ioctl for binding/unbinding buffers into a VM. This is only supported if userspace has opted in to MSM_PARAM_EN_VM_BIND. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661524/ --- include/uapi/drm/msm_drm.h | 74 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 6d6cd1219926..5c67294edc95 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -272,7 +272,10 @@ struct drm_msm_gem_submit_cmd { __u32 size; /* in, cmdstream size */ __u32 pad; __u32 nr_relocs; /* in, number of submit_reloc's */ - __u64 relocs; /* in, ptr to array of submit_reloc's */ + union { + __u64 relocs; /* in, ptr to array of submit_reloc's */ + __u64 iova; /* cmdstream address (for VM_BIND contexts) */ + }; }; /* Each buffer referenced elsewhere in the cmdstream submit (ie. the @@ -339,7 +342,74 @@ struct drm_msm_gem_submit { __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */ __u32 syncobj_stride; /* in, stride of syncobj arrays. */ __u32 pad; /*in, reserved for future use, always 0. */ +}; + +#define MSM_VM_BIND_OP_UNMAP 0 +#define MSM_VM_BIND_OP_MAP 1 +#define MSM_VM_BIND_OP_MAP_NULL 2 + +#define MSM_VM_BIND_OP_DUMP 1 +#define MSM_VM_BIND_OP_FLAGS ( \ + MSM_VM_BIND_OP_DUMP | \ + 0) +/** + * struct drm_msm_vm_bind_op - bind/unbind op to run + */ +struct drm_msm_vm_bind_op { + /** @op: one of MSM_VM_BIND_OP_x */ + __u32 op; + /** @handle: GEM object handle, MBZ for UNMAP or MAP_NULL */ + __u32 handle; + /** @obj_offset: Offset into GEM object, MBZ for UNMAP or MAP_NULL */ + __u64 obj_offset; + /** @iova: Address to operate on */ + __u64 iova; + /** @range: Number of bites to to map/unmap */ + __u64 range; + /** @flags: Bitmask of MSM_VM_BIND_OP_FLAG_x */ + __u32 flags; + /** @pad: MBZ */ + __u32 pad; +}; + +#define MSM_VM_BIND_FENCE_FD_IN 0x00000001 +#define MSM_VM_BIND_FENCE_FD_OUT 0x00000002 +#define MSM_VM_BIND_FLAGS ( \ + MSM_VM_BIND_FENCE_FD_IN | \ + MSM_VM_BIND_FENCE_FD_OUT | \ + 0) + +/** + * struct drm_msm_vm_bind - Input of &DRM_IOCTL_MSM_VM_BIND + */ +struct drm_msm_vm_bind { + /** @flags: in, bitmask of MSM_VM_BIND_x */ + __u32 flags; + /** @nr_ops: the number of bind ops in this ioctl */ + __u32 nr_ops; + /** @fence_fd: in/out fence fd (see MSM_VM_BIND_FENCE_FD_IN/OUT) */ + __s32 fence_fd; + /** @queue_id: in, submitqueue id */ + __u32 queue_id; + /** @in_syncobjs: in, ptr to array of drm_msm_gem_syncobj */ + __u64 in_syncobjs; + /** @out_syncobjs: in, ptr to array of drm_msm_gem_syncobj */ + __u64 out_syncobjs; + /** @nr_in_syncobjs: in, number of entries in in_syncobj */ + __u32 nr_in_syncobjs; + /** @nr_out_syncobjs: in, number of entries in out_syncobj */ + __u32 nr_out_syncobjs; + /** @syncobj_stride: in, stride of syncobj arrays */ + __u32 syncobj_stride; + /** @op_stride: sizeof each struct drm_msm_vm_bind_op in @ops */ + __u32 op_stride; + union { + /** @op: used if num_ops == 1 */ + struct drm_msm_vm_bind_op op; + /** @ops: userptr to array of drm_msm_vm_bind_op if num_ops > 1 */ + __u64 ops; + }; }; #define MSM_WAIT_FENCE_BOOST 0x00000001 @@ -435,6 +505,7 @@ struct drm_msm_submitqueue_query { #define DRM_MSM_SUBMITQUEUE_NEW 0x0A #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C +#define DRM_MSM_VM_BIND 0x0D #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_SET_PARAM DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SET_PARAM, struct drm_msm_param) @@ -448,6 +519,7 @@ struct drm_msm_submitqueue_query { #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue) #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query) +#define DRM_IOCTL_MSM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_VM_BIND, struct drm_msm_vm_bind) #if defined(__cplusplus) } -- cgit v1.2.3 From 1924272b9ce1edc5694e6d112097fd51e821980e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 26 Jun 2025 11:02:28 +0200 Subject: soc: qcom: Add UBWC config provider Add a file that will serve as a single source of truth for UBWC configuration data for various multimedia blocks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/660959/ Signed-off-by: Rob Clark --- include/linux/soc/qcom/ubwc.h | 65 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 include/linux/soc/qcom/ubwc.h (limited to 'include') diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h new file mode 100644 index 000000000000..b92fc402638b --- /dev/null +++ b/include/linux/soc/qcom/ubwc.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018, The Linux Foundation + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef __QCOM_UBWC_H__ +#define __QCOM_UBWC_H__ + +#include +#include + +struct qcom_ubwc_cfg_data { + u32 ubwc_enc_version; + /* Can be read from MDSS_BASE + 0x58 */ + u32 ubwc_dec_version; + + /** + * @ubwc_swizzle: Whether to enable level 1, 2 & 3 bank swizzling. + * + * UBWC 1.0 always enables all three levels. + * UBWC 2.0 removes level 1 bank swizzling, leaving levels 2 & 3. + * UBWC 4.0 adds the optional ability to disable levels 2 & 3. + * + * This is a bitmask where BIT(0) enables level 1, BIT(1) + * controls level 2, and BIT(2) enables level 3. + */ + u32 ubwc_swizzle; + + /** + * @highest_bank_bit: Highest Bank Bit + * + * The Highest Bank Bit value represents the bit of the highest + * DDR bank. This should ideally use DRAM type detection. + */ + int highest_bank_bit; + bool ubwc_bank_spread; + + /** + * @macrotile_mode: Macrotile Mode + * + * Whether to use 4-channel macrotiling mode or the newer + * 8-channel macrotiling mode introduced in UBWC 3.1. 0 is + * 4-channel and 1 is 8-channel. + */ + bool macrotile_mode; +}; + +#define UBWC_1_0 0x10000000 +#define UBWC_2_0 0x20000000 +#define UBWC_3_0 0x30000000 +#define UBWC_4_0 0x40000000 +#define UBWC_4_3 0x40030000 +#define UBWC_5_0 0x50000000 + +#ifdef CONFIG_QCOM_UBWC_CONFIG +const struct qcom_ubwc_cfg_data *qcom_ubwc_config_get_data(void); +#else +static inline const struct qcom_ubwc_cfg_data *qcom_ubwc_config_get_data(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + +#endif /* __QCOM_UBWC_H__ */ -- cgit v1.2.3 From 45a2974157d2d8b6f26911582d64089cab992d8b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 26 Jun 2025 11:02:30 +0200 Subject: drm/msm: Use the central UBWC config database As discussed a lot in the past, the UBWC config must be coherent across a number of IP blocks (currently display and GPU, but it also may/will concern camera/video as the drivers evolve). So far, we've been trying to keep the values reasonable in each of the two drivers separately, but it really make sense to do so centrally, especially given certain fields (e.g. HBB) may need to be gathered dynamically. To reduce room for error, move to fetching the config from a central source, so that the data programmed into the hardware is consistent across all multimedia blocks that request it. Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/660963/ Signed-off-by: Rob Clark --- include/linux/soc/qcom/ubwc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index b92fc402638b..d65df559603d 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -53,7 +53,7 @@ struct qcom_ubwc_cfg_data { #define UBWC_4_3 0x40030000 #define UBWC_5_0 0x50000000 -#ifdef CONFIG_QCOM_UBWC_CONFIG +#if IS_ENABLED(CONFIG_QCOM_UBWC_CONFIG) const struct qcom_ubwc_cfg_data *qcom_ubwc_config_get_data(void); #else static inline const struct qcom_ubwc_cfg_data *qcom_ubwc_config_get_data(void) -- cgit v1.2.3 From 87cfc79dcd605056247c62d65ea41ce6c65cdbe3 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 26 Jun 2025 11:02:34 +0200 Subject: drm/msm/a6xx: Resolve the meaning of UBWC_MODE This bit is set iff the UBWC version is 1.0. That notably does not include QCM2290's "no UBWC". This commit is intentionally cross-subsystem to ease review, as the patchset is intended to be merged together, with a maintainer consensus. Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/660971/ Signed-off-by: Rob Clark --- include/linux/soc/qcom/ubwc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index d65df559603d..f0334f4ece20 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -62,4 +62,14 @@ static inline const struct qcom_ubwc_cfg_data *qcom_ubwc_config_get_data(void) } #endif +static inline bool qcom_ubwc_get_ubwc_mode(const struct qcom_ubwc_cfg_data *cfg) +{ + bool ret = cfg->ubwc_enc_version == UBWC_1_0; + + if (ret && !(cfg->ubwc_swizzle & BIT(0))) + pr_err("UBWC config discrepancy - level 1 swizzling disabled on UBWC 1.0\n"); + + return ret; +} + #endif /* __QCOM_UBWC_H__ */ -- cgit v1.2.3 From 709dd2ff2357734f5a0b2ef68e1f7c4256543a70 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 26 Jun 2025 11:02:39 +0200 Subject: soc: qcom: ubwc: Add #defines for UBWC swizzle bits Make the values a bit more meaningful. This commit is intentionally cross-subsystem to ease review, as the patchset is intended to be merged together, with a maintainer consensus. Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/660981/ Signed-off-by: Rob Clark --- include/linux/soc/qcom/ubwc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index f0334f4ece20..1ed8b1b16bc9 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -21,11 +21,11 @@ struct qcom_ubwc_cfg_data { * UBWC 1.0 always enables all three levels. * UBWC 2.0 removes level 1 bank swizzling, leaving levels 2 & 3. * UBWC 4.0 adds the optional ability to disable levels 2 & 3. - * - * This is a bitmask where BIT(0) enables level 1, BIT(1) - * controls level 2, and BIT(2) enables level 3. */ u32 ubwc_swizzle; +#define UBWC_SWIZZLE_ENABLE_LVL1 BIT(0) +#define UBWC_SWIZZLE_ENABLE_LVL2 BIT(1) +#define UBWC_SWIZZLE_ENABLE_LVL3 BIT(2) /** * @highest_bank_bit: Highest Bank Bit @@ -66,7 +66,7 @@ static inline bool qcom_ubwc_get_ubwc_mode(const struct qcom_ubwc_cfg_data *cfg) { bool ret = cfg->ubwc_enc_version == UBWC_1_0; - if (ret && !(cfg->ubwc_swizzle & BIT(0))) + if (ret && !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL1)) pr_err("UBWC config discrepancy - level 1 swizzling disabled on UBWC 1.0\n"); return ret; -- cgit v1.2.3 From b78287c54bd87924ee328d51336b44a74304d7cc Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 4 Jul 2025 15:10:45 +0300 Subject: drm/bridge: Fix kdoc comment for DRM_BRIDGE_OP_HDMI_CEC_ADAPTER Correct the kernel-doc comment for DRM_BRIDGE_OP_HDMI_CEC_ADAPTER member of enum drm_bridge_ops. This seems to be just a copy-paste artifact from DRM_BRIDGE_OP_HDMI_CEC_NOTIFIER above. Signed-off-by: Cristian Ciocaltea Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250704-drm-bridge-kdoc-fix-v1-1-b08c67212851@collabora.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 7f66f9018c10..d2454ba83db3 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1051,7 +1051,7 @@ enum drm_bridge_ops { */ DRM_BRIDGE_OP_HDMI_CEC_NOTIFIER = BIT(7), /** - * @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER: The bridge requires CEC notifier + * @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER: The bridge requires CEC adapter * to be present. */ DRM_BRIDGE_OP_HDMI_CEC_ADAPTER = BIT(8), -- cgit v1.2.3 From 9c06f26ba5f5da14bcac405c7a652dcf578a785d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 30 Apr 2025 13:56:01 +0200 Subject: pwm: Add support for pwmchip devices for faster and easier userspace access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this change each pwmchip defining the new-style waveform callbacks can be accessed from userspace via a character device. Compared to the sysfs-API this is faster and allows to pass the whole configuration in a single ioctl allowing atomic application and thus reducing glitches. On an STM32MP13 I see: root@DistroKit:~ time pwmtestperf real 0m 1.27s user 0m 0.02s sys 0m 1.21s root@DistroKit:~ rm /dev/pwmchip0 root@DistroKit:~ time pwmtestperf real 0m 3.61s user 0m 0.27s sys 0m 3.26s pwmtestperf does essentially: for i in 0 .. 50000: pwm_set_waveform(duty_length_ns=i, period_length_ns=50000, duty_offset_ns=0) and in the presence of /dev/pwmchip0 is uses the ioctls introduced here, without that device it uses /sys/class/pwm/pwmchip0. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/ad4a4e49ae3f8ea81e23cac1ac12b338c3bf5c5b.1746010245.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 3 +++ include/uapi/linux/pwm.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 include/uapi/linux/pwm.h (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 63a17d2b4ec8..2492c91452f9 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -2,6 +2,7 @@ #ifndef __LINUX_PWM_H #define __LINUX_PWM_H +#include #include #include #include @@ -311,6 +312,7 @@ struct pwm_ops { /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs + * @cdev: &struct cdev for this device * @ops: callbacks for this PWM controller * @owner: module providing this chip * @id: unique number of this PWM chip @@ -325,6 +327,7 @@ struct pwm_ops { */ struct pwm_chip { struct device dev; + struct cdev cdev; const struct pwm_ops *ops; struct module *owner; unsigned int id; diff --git a/include/uapi/linux/pwm.h b/include/uapi/linux/pwm.h new file mode 100644 index 000000000000..182d59cc07ee --- /dev/null +++ b/include/uapi/linux/pwm.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ + +#ifndef _UAPI_PWM_H_ +#define _UAPI_PWM_H_ + +#include +#include + +/** + * struct pwmchip_waveform - Describe a PWM waveform for a pwm_chip's PWM channel + * @hwpwm: per-chip relative index of the PWM device + * @__pad: padding, must be zero + * @period_length_ns: duration of the repeating period. + * A value of 0 represents a disabled PWM. + * @duty_length_ns: duration of the active part in each period + * @duty_offset_ns: offset of the rising edge from a period's start + */ +struct pwmchip_waveform { + __u32 hwpwm; + __u32 __pad; + __u64 period_length_ns; + __u64 duty_length_ns; + __u64 duty_offset_ns; +}; + +/* Reserves the passed hwpwm for exclusive control. */ +#define PWM_IOCTL_REQUEST _IO(0x75, 1) + +/* counter part to PWM_IOCTL_REQUEST */ +#define PWM_IOCTL_FREE _IO(0x75, 2) + +/* + * Modifies the passed wf according to hardware constraints. All parameters are + * rounded down to the next possible value, unless there is no such value, then + * values are rounded up. Note that zero isn't considered for rounding down + * period_length_ns. + */ +#define PWM_IOCTL_ROUNDWF _IOWR(0x75, 3, struct pwmchip_waveform) + +/* Get the currently implemented waveform */ +#define PWM_IOCTL_GETWF _IOWR(0x75, 4, struct pwmchip_waveform) + +/* Like PWM_IOCTL_ROUNDWF + PWM_IOCTL_SETEXACTWF in one go. */ +#define PWM_IOCTL_SETROUNDEDWF _IOW(0x75, 5, struct pwmchip_waveform) + +/* + * Program the PWM to emit exactly the passed waveform, subject only to rounding + * down each value less than 1 ns. Returns 0 on success, -EDOM if the waveform + * cannot be implemented exactly, or other negative error codes. + */ +#define PWM_IOCTL_SETEXACTWF _IOW(0x75, 6, struct pwmchip_waveform) + +#endif /* _UAPI_PWM_H_ */ -- cgit v1.2.3 From edd3bcb1801e1bb98f4f81485140e18c86406ced Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Wed, 2 Jul 2025 15:45:29 +0200 Subject: pwm: Expose PWM_WFHWSIZE in public header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WFHWSIZE constant defines the maximum size for the hardware-specific waveform representation buffer. It is currently local to drivers/pwm/core.c, which makes it inaccessible to external tools like bindgen. Move the constant to include/linux/pwm.h to make it part of the public API. As part of this change, rename it to PWM_WFHWSIZE to follow standard kernel conventions for namespacing macros in public headers. This allows bindgen to automatically generate a corresponding constant for the Rust PWM abstractions, ensuring the value remains synchronized between the C core and Rust code and preventing future maintenance issues. Signed-off-by: Michal Wilczynski Link: https://lore.kernel.org/r/20250702-rust-next-pwm-working-fan-for-sending-v7-1-67ef39ff1d29@samsung.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 2492c91452f9..8cafc483db53 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -274,6 +274,8 @@ struct pwm_capture { unsigned int duty_cycle; }; +#define PWM_WFHWSIZE 20 + /** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM -- cgit v1.2.3 From 9bad4bec5daddbb296481af759f9d56c849ba96f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Jul 2025 13:49:40 +0200 Subject: gpio: mmio: remove struct bgpio_pdata With no more users, we can now remove struct bgpio_pdata. Move the relevant bits from bgpio_parse_fw() into bgpio_pdev_probe() while maintaining the logical ordering (get flags before calling bgpio_init()). Link: https://lore.kernel.org/r/20250701-gpio-mmio-pdata-v2-6-ebf34d273497@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b53233051bee..602d4acd36b2 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -718,12 +718,6 @@ const unsigned long *gpiochip_query_valid_mask(const struct gpio_chip *gc); /* get driver data */ void *gpiochip_get_data(struct gpio_chip *gc); -struct bgpio_pdata { - const char *label; - int base; - int ngpio; -}; - #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, -- cgit v1.2.3 From 8595375e4fded27de24b189c692c2c50051a7b3b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 2 Jul 2025 11:22:08 +0200 Subject: gpio: generic: add new generic GPIO chip API As the first step in removing the fields specific to the gpio-mmio module from struct gpio_chip, we introduce a new set of generic GPIO chip interfaces that are meant to replace the existing bgpio_ ones. The new initialization function - gpio_generic_chip_init() - takes a configuration structure as argument instead of 9 separate parameters. This will allow easy extension if needed in the future. We hide the locking details behind a set of helpers in order to be able to move the raw spinlock out of struct gpio_chip without the users noticing. For now, the new APIs just wrap the existing ones. Once all users have been converted to the new interfaces, we'll pull them into gpio-mmio and implement them in a backward-compatible way while also moving all fields specific to the generic GPIO chip into struct gpio_generic_chip. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250702-gpio-mmio-rework-v2-1-6b77aab684d8@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/generic.h | 120 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 include/linux/gpio/generic.h (limited to 'include') diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h new file mode 100644 index 000000000000..b511acd58ab0 --- /dev/null +++ b/include/linux/gpio/generic.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_GPIO_GENERIC_H +#define __LINUX_GPIO_GENERIC_H + +#include +#include +#include + +struct device; + +/** + * struct gpio_generic_chip_config - Generic GPIO chip configuration data + * @dev: Parent device of the new GPIO chip (compulsory). + * @sz: Size (width) of the MMIO registers in bytes, typically 1, 2 or 4. + * @dat: MMIO address for the register to READ the value of the GPIO lines, it + * is expected that a 1 in the corresponding bit in this register means + * the line is asserted. + * @set: MMIO address for the register to SET the value of the GPIO lines, it + * is expected that we write the line with 1 in this register to drive + * the GPIO line high. + * @clr: MMIO address for the register to CLEAR the value of the GPIO lines, + * it is expected that we write the line with 1 in this register to + * drive the GPIO line low. It is allowed to leave this address as NULL, + * in that case the SET register will be assumed to also clear the GPIO + * lines, by actively writing the line with 0. + * @dirout: MMIO address for the register to set the line as OUTPUT. It is + * assumed that setting a line to 1 in this register will turn that + * line into an output line. Conversely, setting the line to 0 will + * turn that line into an input. + * @dirin: MMIO address for the register to set this line as INPUT. It is + * assumed that setting a line to 1 in this register will turn that + * line into an input line. Conversely, setting the line to 0 will + * turn that line into an output. + * @flags: Different flags that will affect the behaviour of the device, such + * as endianness etc. + */ +struct gpio_generic_chip_config { + struct device *dev; + unsigned long sz; + void __iomem *dat; + void __iomem *set; + void __iomem *clr; + void __iomem *dirout; + void __iomem *dirin; + unsigned long flags; +}; + +/** + * struct gpio_generic_chip - Generic GPIO chip implementation. + * @gc: The underlying struct gpio_chip object, implementing low-level GPIO + * chip routines. + */ +struct gpio_generic_chip { + struct gpio_chip gc; +}; + +/** + * gpio_generic_chip_init() - Initialize a generic GPIO chip. + * @chip: Generic GPIO chip to set up. + * @cfg: Generic GPIO chip configuration. + * + * Returns 0 on success, negative error number on failure. + */ +static inline int +gpio_generic_chip_init(struct gpio_generic_chip *chip, + const struct gpio_generic_chip_config *cfg) +{ + return bgpio_init(&chip->gc, cfg->dev, cfg->sz, cfg->dat, cfg->set, + cfg->clr, cfg->dirout, cfg->dirin, cfg->flags); +} + +/** + * gpio_generic_chip_set() - Set the GPIO line value of the generic GPIO chip. + * @chip: Generic GPIO chip to use. + * @offset: Hardware offset of the line to set. + * @value: New GPIO line value. + * + * Some modules using the generic GPIO chip, need to set line values in their + * direction setters but they don't have access to the gpio-mmio symbols so + * they use the function pointer in struct gpio_chip directly. This is not + * optimal and can lead to crashes at run-time in some instances. This wrapper + * provides a safe interface for users. + * + * Returns: 0 on success, negative error number of failure. + */ +static inline int +gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, + int value) +{ + if (WARN_ON(!chip->gc.set_rv)) + return -EOPNOTSUPP; + + return chip->gc.set_rv(&chip->gc, offset, value); +} + +#define gpio_generic_chip_lock(gen_gc) \ + raw_spin_lock(&(gen_gc)->gc.bgpio_lock) + +#define gpio_generic_chip_unlock(gen_gc) \ + raw_spin_unlock(&(gen_gc)->gc.bgpio_lock) + +#define gpio_generic_chip_lock_irqsave(gen_gc, flags) \ + raw_spin_lock_irqsave(&(gen_gc)->gc.bgpio_lock, flags) + +#define gpio_generic_chip_unlock_irqrestore(gen_gc, flags) \ + raw_spin_unlock_irqrestore(&(gen_gc)->gc.bgpio_lock, flags) + +DEFINE_LOCK_GUARD_1(gpio_generic_lock, + struct gpio_generic_chip, + gpio_generic_chip_lock(_T->lock), + gpio_generic_chip_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(gpio_generic_lock_irqsave, + struct gpio_generic_chip, + gpio_generic_chip_lock_irqsave(_T->lock, _T->flags), + gpio_generic_chip_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +#endif /* __LINUX_GPIO_GENERIC_H */ -- cgit v1.2.3 From c5fd399a24c8e2865524361f7dc4d4a6899be4f4 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 1 Jul 2025 17:55:41 +1000 Subject: wifi: mac80211: correctly identify S1G short beacon mac80211 identifies a short beacon by the presence of the next TBTT field, however the standard actually doesn't explicitly state that the next TBTT can't be in a long beacon or even that it is required in a short beacon - and as a result this validation does not work for all vendor implementations. The standard explicitly states that an S1G long beacon shall contain the S1G beacon compatibility element as the first element in a beacon transmitted at a TBTT that is not a TSBTT (Target Short Beacon Transmission Time) as per IEEE80211-2024 11.1.3.10.1. This is validated by 9.3.4.3 Table 9-76 which states that the S1G beacon compatibility element is only allowed in the full set and is not allowed in the minimum set of elements permitted for use within short beacons. Correctly identify short beacons by the lack of an S1G beacon compatibility element as the first element in an S1G beacon frame. Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Signed-off-by: Simon Wadsworth Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250701075541.162619-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22f39e5e2ff1..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -662,18 +662,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc) (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); } -/** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon - * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set - */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) -{ - return ieee80211_is_s1g_beacon(fc) && - (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); -} - /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder @@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; -- cgit v1.2.3 From 377d7860c960ac8e672881bc50353d867e2f94a4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:33 +0200 Subject: cred: add auto cleanup method Add a simple auto cleanup method for struct cred. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-19-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- include/linux/cred.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 5658a3bfe803..a102a10f833f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -263,6 +263,8 @@ static inline void put_cred(const struct cred *cred) put_cred_many(cred, 1); } +DEFINE_FREE(put_cred, struct cred *, if (!IS_ERR_OR_NULL(_T)) put_cred(_T)) + /** * current_cred - Access the current task's subjective credentials * -- cgit v1.2.3 From 3ebed2fddf6fac5729ffc8c471c87d111b641678 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 27 Jun 2025 22:51:22 +0200 Subject: power: supply: core: Add power_supply_get/set_property_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power supply extensions might want to interact with the underlying power supply to retrieve data like serial numbers, charging status and more. However doing so causes psy->extensions_sem to be locked twice, possibly causing a deadlock. Provide special variants of power_supply_get/set_property() that ignore any power supply extensions and thus do not touch the associated psy->extensions_sem lock. Suggested-by: Hans de Goede Signed-off-by: Armin Wolf Acked-by: Sebastian Reichel Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250627205124.250433-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/power_supply.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 7803edaa8ff8..0cca01b5607b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -888,15 +888,23 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } extern int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); +int power_supply_get_property_direct(struct power_supply *psy, enum power_supply_property psp, + union power_supply_propval *val); #if IS_ENABLED(CONFIG_POWER_SUPPLY) extern int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val); +int power_supply_set_property_direct(struct power_supply *psy, enum power_supply_property psp, + const union power_supply_propval *val); #else static inline int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { return 0; } +static inline int power_supply_set_property_direct(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ return 0; } #endif extern void power_supply_external_power_changed(struct power_supply *psy); -- cgit v1.2.3 From 1cea5180f2f812c444ceebdc40f5d001bedd030d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 5 Jul 2025 14:19:50 -0600 Subject: block: remove pktcdvd driver This driver has long outlived it's utility, and it's broken and unloved. The main use case for this was direct mount with UDF of cd-rw drives that required 32kb packets. It would collect writes into that size and write them out in multiples of that. That's not a common use case anymore, the world has moved on from those kinds of media. To make matters worse, it's actively breaking setups where it's not even required or useful. Link: https://lore.kernel.org/linux-block/fxg6dksau4jsk3u5xldlyo2m7qgiux6vtdrz5rywseotsouqdv@urcrwz6qtd3r/ Link: https://lore.kernel.org/linux-block/dcc4836e-6da9-4208-ad27-bbd44b3a2063@kernel.dk/ Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/pktcdvd.h | 198 ------------------------------------------------ 1 file changed, 198 deletions(-) delete mode 100644 include/linux/pktcdvd.h (limited to 'include') diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h deleted file mode 100644 index 2f1b952d596a..000000000000 --- a/include/linux/pktcdvd.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (C) 2000 Jens Axboe - * Copyright (C) 2001-2004 Peter Osterlund - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and - * DVD-RW devices. - * - */ -#ifndef __PKTCDVD_H -#define __PKTCDVD_H - -#include -#include -#include -#include -#include -#include -#include - -/* default bio write queue congestion marks */ -#define PKT_WRITE_CONGESTION_ON 10000 -#define PKT_WRITE_CONGESTION_OFF 9000 - - -struct packet_settings -{ - __u32 size; /* packet size in (512 byte) sectors */ - __u8 fp; /* fixed packets */ - __u8 link_loss; /* the rest is specified - * as per Mt Fuji */ - __u8 write_type; - __u8 track_mode; - __u8 block_mode; -}; - -/* - * Very crude stats for now - */ -struct packet_stats -{ - unsigned long pkt_started; - unsigned long pkt_ended; - unsigned long secs_w; - unsigned long secs_rg; - unsigned long secs_r; -}; - -struct packet_cdrw -{ - struct list_head pkt_free_list; - struct list_head pkt_active_list; - spinlock_t active_list_lock; /* Serialize access to pkt_active_list */ - struct task_struct *thread; - atomic_t pending_bios; -}; - -/* - * Switch to high speed reading after reading this many kilobytes - * with no interspersed writes. - */ -#define HI_SPEED_SWITCH 512 - -struct packet_iosched -{ - atomic_t attention; /* Set to non-zero when queue processing is needed */ - int writing; /* Non-zero when writing, zero when reading */ - spinlock_t lock; /* Protecting read/write queue manipulations */ - struct bio_list read_queue; - struct bio_list write_queue; - sector_t last_write; /* The sector where the last write ended */ - int successive_reads; -}; - -/* - * 32 buffers of 2048 bytes - */ -#if (PAGE_SIZE % CD_FRAMESIZE) != 0 -#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE" -#endif -#define PACKET_MAX_SIZE 128 -#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE) -#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9) - -enum packet_data_state { - PACKET_IDLE_STATE, /* Not used at the moment */ - PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */ - /* we don't have to do as much */ - /* data gathering */ - PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */ - PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */ - PACKET_RECOVERY_STATE, /* Recover after read/write errors */ - PACKET_FINISHED_STATE, /* After write has finished */ - - PACKET_NUM_STATES /* Number of possible states */ -}; - -/* - * Information needed for writing a single packet - */ -struct pktcdvd_device; - -struct packet_data -{ - struct list_head list; - - spinlock_t lock; /* Lock protecting state transitions and */ - /* orig_bios list */ - - struct bio_list orig_bios; /* Original bios passed to pkt_make_request */ - /* that will be handled by this packet */ - int write_size; /* Total size of all bios in the orig_bios */ - /* list, measured in number of frames */ - - struct bio *w_bio; /* The bio we will send to the real CD */ - /* device once we have all data for the */ - /* packet we are going to write */ - sector_t sector; /* First sector in this packet */ - int frames; /* Number of frames in this packet */ - - enum packet_data_state state; /* Current state */ - atomic_t run_sm; /* Incremented whenever the state */ - /* machine needs to be run */ - long sleep_time; /* Set this to non-zero to make the state */ - /* machine run after this many jiffies. */ - - atomic_t io_wait; /* Number of pending IO operations */ - atomic_t io_errors; /* Number of read/write errors during IO */ - - struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */ - struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE]; - - int cache_valid; /* If non-zero, the data for the zone defined */ - /* by the sector variable is completely cached */ - /* in the pages[] vector. */ - - int id; /* ID number for debugging */ - struct pktcdvd_device *pd; -}; - -struct pkt_rb_node { - struct rb_node rb_node; - struct bio *bio; -}; - -struct packet_stacked_data -{ - struct bio *bio; /* Original read request bio */ - struct pktcdvd_device *pd; -}; -#define PSD_POOL_SIZE 64 - -struct pktcdvd_device -{ - struct file *bdev_file; /* dev attached */ - /* handle acquired for bdev during pkt_open_dev() */ - struct file *f_open_bdev; - dev_t pkt_dev; /* our dev */ - struct packet_settings settings; - struct packet_stats stats; - int refcnt; /* Open count */ - int write_speed; /* current write speed, kB/s */ - int read_speed; /* current read speed, kB/s */ - unsigned long offset; /* start offset */ - __u8 mode_offset; /* 0 / 8 */ - __u8 type; - unsigned long flags; - __u16 mmc3_profile; - __u32 nwa; /* next writable address */ - __u32 lra; /* last recorded address */ - struct packet_cdrw cdrw; - wait_queue_head_t wqueue; - - spinlock_t lock; /* Serialize access to bio_queue */ - struct rb_root bio_queue; /* Work queue of bios we need to handle */ - int bio_queue_size; /* Number of nodes in bio_queue */ - bool congested; /* Someone is waiting for bio_queue_size - * to drop. */ - sector_t current_sector; /* Keep track of where the elevator is */ - atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ - /* needs to be run. */ - mempool_t rb_pool; /* mempool for pkt_rb_node allocations */ - - struct packet_iosched iosched; - struct gendisk *disk; - - int write_congestion_off; - int write_congestion_on; - - struct device *dev; /* sysfs pktcdvd[0-7] dev */ - - struct dentry *dfs_d_root; /* debugfs: devname directory */ - struct dentry *dfs_f_info; /* debugfs: info file */ -}; - -#endif /* __PKTCDVD_H */ -- cgit v1.2.3 From e7a1cbca0b4255ac3e03f53d440fa38f7a41d8cc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 30 Jun 2025 16:34:11 +0200 Subject: drm/gem-shmem: Do not map s/g table by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vast majority of drivers that use GEM-SHMEM helpers do not use an s/g table for imported buffers; specifically all drivers that use DRM_GEM_SHMEM_DRIVER_OPS. Therefore convert the initializer macro to DRM_GEM_SHMEM_DRIVER_OPS_NO_MAP_SGT and remove the latter. This helps to avoid swiotbl errors, such as seen with some Aspeed systems ast 0000:07:00.0: swiotlb buffer is full (sz: 3145728 bytes), total 32768 (slots), used 0 (slots) The error is caused by the system's limited DMA capabilities and can happen with any GEM-SHMEM-based driver. It results in a performance penalty. In the case of vgem and vkms, the devices do not support DMA at all, which can result in failure to map the buffer object into the kernel's address space. [1][2] Avoiding the s/g table fixes this problem. The other drivers based on GEM-SHMEM, imagination, lima, panfrost, panthor, v3d and virtio, use the s/g table of imported buffers. Neither driver uses the default initializer, so they won't be affected by this change. Signed-off-by: Thomas Zimmermann Reported-by: Zenghui Yu Closes: https://lore.kernel.org/dri-devel/6d22bce3-4533-4cfa-96ba-64352b715741@linux.dev/ # [1] Reported-by: José Expósito Closes: https://lore.kernel.org/dri-devel/20250311172054.2903-1-jose.exposito89@gmail.com/ # [2] Tested-by: Zenghui Yu Reviewed-by: Christian König Reviewed-by: Louis Chauvet Link: https://lore.kernel.org/r/20250630143537.309052-1-tzimmermann@suse.de --- include/drm/drm_gem_shmem_helper.h | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 35f7466dca84..92f5db84b9c2 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -293,23 +293,11 @@ struct drm_gem_object *drm_gem_shmem_prime_import_no_map(struct drm_device *dev, /** * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations * - * This macro provides a shortcut for setting the shmem GEM operations in - * the &drm_driver structure. + * This macro provides a shortcut for setting the shmem GEM operations + * in the &drm_driver structure. Drivers that do not require an s/g table + * for imported buffers should use this. */ #define DRM_GEM_SHMEM_DRIVER_OPS \ - .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, \ - .dumb_create = drm_gem_shmem_dumb_create - -/** - * DRM_GEM_SHMEM_DRIVER_OPS_NO_MAP_SGT - shmem GEM operations - * without mapping sg_table on - * imported buffer. - * - * This macro provides a shortcut for setting the shmem GEM operations in - * the &drm_driver structure for drivers that do not require a sg_table on - * imported buffers. - */ -#define DRM_GEM_SHMEM_DRIVER_OPS_NO_MAP_SGT \ .gem_prime_import = drm_gem_shmem_prime_import_no_map, \ .dumb_create = drm_gem_shmem_dumb_create -- cgit v1.2.3 From c479d7cf06c3d65532442fa368b058e05dbba1a2 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 2 Jul 2025 06:37:07 -0500 Subject: reset: spacemit: add support for SpacemiT CCU resets Implement reset support for SpacemiT CCUs. A SpacemiT reset controller device is an auxiliary device associated with a clock controller (CCU). This patch defines the reset controllers for the MPMU, APBC, and MPMU CCUs, which already define clock controllers. It also adds RCPU, RCPU2, and ACPB2 CCUs, which only define resets. Signed-off-by: Alex Elder Reviewed-by: Philipp Zabel Reviewed-by: Yixun Lan Acked-by: Philipp Zabel Link: https://lore.kernel.org/r/20250702113709.291748-6-elder@riscstar.com Signed-off-by: Yixun Lan --- include/soc/spacemit/k1-syscon.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/soc/spacemit/k1-syscon.h b/include/soc/spacemit/k1-syscon.h index 53eff7691f33..c59bd7a38e5b 100644 --- a/include/soc/spacemit/k1-syscon.h +++ b/include/soc/spacemit/k1-syscon.h @@ -127,4 +127,34 @@ to_spacemit_ccu_adev(struct auxiliary_device *adev) #define APMU_EMAC0_CLK_RES_CTRL 0x3e4 #define APMU_EMAC1_CLK_RES_CTRL 0x3ec +/* RCPU register offsets */ +#define RCPU_SSP0_CLK_RST 0x0028 +#define RCPU_I2C0_CLK_RST 0x0030 +#define RCPU_UART1_CLK_RST 0x003c +#define RCPU_CAN_CLK_RST 0x0048 +#define RCPU_IR_CLK_RST 0x004c +#define RCPU_UART0_CLK_RST 0x00d8 +#define AUDIO_HDMI_CLK_CTRL 0x2044 + +/* RCPU2 register offsets */ +#define RCPU2_PWM0_CLK_RST 0x0000 +#define RCPU2_PWM1_CLK_RST 0x0004 +#define RCPU2_PWM2_CLK_RST 0x0008 +#define RCPU2_PWM3_CLK_RST 0x000c +#define RCPU2_PWM4_CLK_RST 0x0010 +#define RCPU2_PWM5_CLK_RST 0x0014 +#define RCPU2_PWM6_CLK_RST 0x0018 +#define RCPU2_PWM7_CLK_RST 0x001c +#define RCPU2_PWM8_CLK_RST 0x0020 +#define RCPU2_PWM9_CLK_RST 0x0024 + +/* APBC2 register offsets */ +#define APBC2_UART1_CLK_RST 0x0000 +#define APBC2_SSP2_CLK_RST 0x0004 +#define APBC2_TWSI3_CLK_RST 0x0008 +#define APBC2_RTC_CLK_RST 0x000c +#define APBC2_TIMERS0_CLK_RST 0x0010 +#define APBC2_KPC_CLK_RST 0x0014 +#define APBC2_GPIO_CLK_RST 0x001c + #endif /* __SOC_K1_SYSCON_H__ */ -- cgit v1.2.3 From 2ec1067d1e5acb9e7aeb1fe6d5178424fc3107ab Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 1 Jul 2025 00:10:33 +0000 Subject: ASoC: soc-dapm: remove snd_soc_dapm_nc_pin[_unlocked]() snd_soc_dapm_nc_pin() was added in commit 5817b52a298a ("ALSA: ASoC: Allow machine drivers to mark pins as not connected") at 2008. It is identical to snd_soc_dapm_disable_pin[_unlocked](). It was expected to be updated, but were enough as-is for this 17 years. We might update these, but renaming function name by define is enough for now. We can re-create these if needed in the future. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87tt3whitj.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 400584474bc8..2a0250fff9ba 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -498,8 +498,6 @@ int snd_soc_dapm_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_disable_pin(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_disable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_nc_pin(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_nc_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_get_pin_status(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_sync(struct snd_soc_dapm_context *dapm); int snd_soc_dapm_sync_unlocked(struct snd_soc_dapm_context *dapm); @@ -509,6 +507,16 @@ int snd_soc_dapm_ignore_suspend(struct snd_soc_dapm_context *dapm, const char *p unsigned int dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); void dapm_mark_endpoints_dirty(struct snd_soc_card *card); +/* + * Marks the specified pin as being not connected, disabling it along + * any parent or child widgets. At present this is identical to + * snd_soc_dapm_disable_pin[_unlocked]() but in future it will be extended to do + * additional things such as disabling controls which only affect + * paths through the pin. + */ +#define snd_soc_dapm_nc_pin snd_soc_dapm_disable_pin +#define snd_soc_dapm_nc_pin_unlocked snd_soc_dapm_disable_pin_unlocked + /* dapm path query */ int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, struct snd_soc_dapm_widget_list **list, -- cgit v1.2.3 From d6f240031afbd780431ef289357373e2bbf2f793 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 1 Jul 2025 00:10:39 +0000 Subject: ASoC: soc-dapm: remove snd_soc_dapm_weak_routes() No one is using snd_soc_dapm_weak_routes(), let's remove it. Because snd_soc_dapm_weak_routes() was removed, path->weak is not needed either. Remove it, too. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87sejghitd.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 2a0250fff9ba..6d854c727bca 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -473,8 +473,6 @@ int snd_soc_dapm_add_routes(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_route *route, int num); int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_route *route, int num); -int snd_soc_dapm_weak_routes(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_route *route, int num); void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); /* dapm events */ @@ -611,7 +609,6 @@ struct snd_soc_dapm_path { /* status */ u32 connect:1; /* source and sink widgets are connected */ u32 walking:1; /* path is in the process of being walked */ - u32 weak:1; /* path ignored for power management */ u32 is_supply:1; /* At least one of the connected widgets is a supply */ int (*connected)(struct snd_soc_dapm_widget *source, -- cgit v1.2.3 From f02ccc8c0b99382807bac0065918a90ba974e9ab Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 1 Jul 2025 00:10:51 +0000 Subject: ASoC: soc-dapm: reordering header definitions Because header defined randomly, it needs name definitions on top of soc-dapm.h. it is not needed if definitions are implemented in correct order. This patch has big change from change-line point of view, but is just reordering, nothing changed in meaning. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87plekhit0.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 235 ++++++++++++++++++++++------------------------- 1 file changed, 110 insertions(+), 125 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 6d854c727bca..5aeb0822ce0b 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -16,9 +16,10 @@ #include struct device; +struct regulator; +struct soc_enum; struct snd_pcm_substream; struct snd_soc_pcm_runtime; -struct soc_enum; /* widget has no PM register bit */ #define SND_SOC_NOPM -1 @@ -399,17 +400,6 @@ struct soc_enum; /* regulator widget flags */ #define SND_SOC_DAPM_REGULATOR_BYPASS 0x1 /* bypass when disabled */ -struct snd_soc_dapm_widget; -enum snd_soc_dapm_type; -struct snd_soc_dapm_path; -struct snd_soc_dapm_pin; -struct snd_soc_dapm_route; -struct snd_soc_dapm_context; -struct regulator; -struct snd_soc_dapm_widget_list; -struct snd_soc_dapm_update; -enum snd_soc_dapm_direction; - /* * Bias levels * @@ -428,104 +418,6 @@ enum snd_soc_bias_level { SND_SOC_BIAS_ON = 3, }; -int dapm_regulator_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); -int dapm_clock_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); -int dapm_pinctrl_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); - -/* dapm controls */ -int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); -int snd_soc_dapm_get_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); -int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol); -int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol); -int snd_soc_dapm_info_pin_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo); -int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *uncontrol); -int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *uncontrol); -int snd_soc_dapm_get_component_pin_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *uncontrol); -int snd_soc_dapm_put_component_pin_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *uncontrol); -int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_widget *widget, unsigned int num); -struct snd_soc_dapm_widget *snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_widget *widget); -struct snd_soc_dapm_widget *snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_widget *widget); -int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, struct snd_soc_dai *dai); -void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); -int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card); -void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); - -int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); -int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); - -/* dapm path setup */ -int snd_soc_dapm_new_widgets(struct snd_soc_card *card); -void snd_soc_dapm_free(struct snd_soc_dapm_context *dapm); -void snd_soc_dapm_init(struct snd_soc_dapm_context *dapm, - struct snd_soc_card *card, struct snd_soc_component *component); -int snd_soc_dapm_add_routes(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_route *route, int num); -int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_route *route, int num); -void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); - -/* dapm events */ -void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream, int event); -void snd_soc_dapm_stream_stop(struct snd_soc_pcm_runtime *rtd, int stream); -void snd_soc_dapm_shutdown(struct snd_soc_card *card); - -/* external DAPM widget events */ -int snd_soc_dapm_mixer_update_power(struct snd_soc_dapm_context *dapm, - struct snd_kcontrol *kcontrol, int connect, struct snd_soc_dapm_update *update); -int snd_soc_dapm_mux_update_power(struct snd_soc_dapm_context *dapm, - struct snd_kcontrol *kcontrol, int mux, struct soc_enum *e, - struct snd_soc_dapm_update *update); - -/* dapm sys fs - used by the core */ -extern struct attribute *soc_dapm_dev_attrs[]; -void snd_soc_dapm_debugfs_init(struct snd_soc_dapm_context *dapm, struct dentry *parent); - -/* dapm audio pin control and status */ -int snd_soc_dapm_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_disable_pin(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_disable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_get_pin_status(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_sync(struct snd_soc_dapm_context *dapm); -int snd_soc_dapm_sync_unlocked(struct snd_soc_dapm_context *dapm); -int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_force_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); -int snd_soc_dapm_ignore_suspend(struct snd_soc_dapm_context *dapm, const char *pin); -unsigned int dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); -void dapm_mark_endpoints_dirty(struct snd_soc_card *card); - -/* - * Marks the specified pin as being not connected, disabling it along - * any parent or child widgets. At present this is identical to - * snd_soc_dapm_disable_pin[_unlocked]() but in future it will be extended to do - * additional things such as disabling controls which only affect - * paths through the pin. - */ -#define snd_soc_dapm_nc_pin snd_soc_dapm_disable_pin -#define snd_soc_dapm_nc_pin_unlocked snd_soc_dapm_disable_pin_unlocked - -/* dapm path query */ -int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, - struct snd_soc_dapm_widget_list **list, - bool (*custom_stop_condition)(struct snd_soc_dapm_widget *, enum snd_soc_dapm_direction)); -void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list); - -struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_dapm(struct snd_kcontrol *kcontrol); -struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_widget(struct snd_kcontrol *kcontrol); - -int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); - /* dapm widget types */ enum snd_soc_dapm_type { snd_soc_dapm_input = 0, /* input pin */ @@ -717,11 +609,6 @@ struct snd_soc_dapm_widget_list { struct snd_soc_dapm_widget *widgets[] __counted_by(num_widgets); }; -#define for_each_dapm_widgets(list, i, widget) \ - for ((i) = 0; \ - (i) < list->num_widgets && (widget = list->widgets[i]); \ - (i)++) - struct snd_soc_dapm_stats { int power_checks; int path_checks; @@ -733,6 +620,114 @@ struct snd_soc_dapm_pinctrl_priv { const char *sleep_state; }; +enum snd_soc_dapm_direction { + SND_SOC_DAPM_DIR_IN, + SND_SOC_DAPM_DIR_OUT +}; + +#define SND_SOC_DAPM_DIR_TO_EP(x) BIT(x) + +#define SND_SOC_DAPM_EP_SOURCE SND_SOC_DAPM_DIR_TO_EP(SND_SOC_DAPM_DIR_IN) +#define SND_SOC_DAPM_EP_SINK SND_SOC_DAPM_DIR_TO_EP(SND_SOC_DAPM_DIR_OUT) + +int dapm_regulator_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); +int dapm_clock_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); +int dapm_pinctrl_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); + +/* dapm controls */ +int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_dapm_get_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_dapm_info_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); +int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *uncontrol); +int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *uncontrol); +int snd_soc_dapm_get_component_pin_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *uncontrol); +int snd_soc_dapm_put_component_pin_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *uncontrol); +int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, + const struct snd_soc_dapm_widget *widget, unsigned int num); +struct snd_soc_dapm_widget *snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, + const struct snd_soc_dapm_widget *widget); +struct snd_soc_dapm_widget *snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, + const struct snd_soc_dapm_widget *widget); +int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, struct snd_soc_dai *dai); +void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); +int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card); +void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); + +int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); + +/* dapm path setup */ +int snd_soc_dapm_new_widgets(struct snd_soc_card *card); +void snd_soc_dapm_free(struct snd_soc_dapm_context *dapm); +void snd_soc_dapm_init(struct snd_soc_dapm_context *dapm, + struct snd_soc_card *card, struct snd_soc_component *component); +int snd_soc_dapm_add_routes(struct snd_soc_dapm_context *dapm, + const struct snd_soc_dapm_route *route, int num); +int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm, + const struct snd_soc_dapm_route *route, int num); +void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); + +/* dapm events */ +void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream, int event); +void snd_soc_dapm_stream_stop(struct snd_soc_pcm_runtime *rtd, int stream); +void snd_soc_dapm_shutdown(struct snd_soc_card *card); + +/* external DAPM widget events */ +int snd_soc_dapm_mixer_update_power(struct snd_soc_dapm_context *dapm, + struct snd_kcontrol *kcontrol, int connect, struct snd_soc_dapm_update *update); +int snd_soc_dapm_mux_update_power(struct snd_soc_dapm_context *dapm, + struct snd_kcontrol *kcontrol, int mux, struct soc_enum *e, + struct snd_soc_dapm_update *update); + +/* dapm sys fs - used by the core */ +extern struct attribute *soc_dapm_dev_attrs[]; +void snd_soc_dapm_debugfs_init(struct snd_soc_dapm_context *dapm, struct dentry *parent); + +/* dapm audio pin control and status */ +int snd_soc_dapm_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_disable_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_disable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_get_pin_status(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_sync(struct snd_soc_dapm_context *dapm); +int snd_soc_dapm_sync_unlocked(struct snd_soc_dapm_context *dapm); +int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_force_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_ignore_suspend(struct snd_soc_dapm_context *dapm, const char *pin); +unsigned int dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); +void dapm_mark_endpoints_dirty(struct snd_soc_card *card); + +/* + * Marks the specified pin as being not connected, disabling it along + * any parent or child widgets. At present this is identical to + * snd_soc_dapm_disable_pin[_unlocked]() but in future it will be extended to do + * additional things such as disabling controls which only affect + * paths through the pin. + */ +#define snd_soc_dapm_nc_pin snd_soc_dapm_disable_pin +#define snd_soc_dapm_nc_pin_unlocked snd_soc_dapm_disable_pin_unlocked + +/* dapm path query */ +int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, + struct snd_soc_dapm_widget_list **list, + bool (*custom_stop_condition)(struct snd_soc_dapm_widget *, enum snd_soc_dapm_direction)); +void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list); + +struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_dapm(struct snd_kcontrol *kcontrol); +struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_widget(struct snd_kcontrol *kcontrol); + +int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); + +#define for_each_dapm_widgets(list, i, widget) \ + for ((i) = 0; \ + (i) < list->num_widgets && (widget = list->widgets[i]); \ + (i)++) + /** * snd_soc_dapm_init_bias_level() - Initialize DAPM bias level * @dapm: The DAPM context to initialize @@ -764,16 +759,6 @@ static inline enum snd_soc_bias_level snd_soc_dapm_get_bias_level( return dapm->bias_level; } -enum snd_soc_dapm_direction { - SND_SOC_DAPM_DIR_IN, - SND_SOC_DAPM_DIR_OUT -}; - -#define SND_SOC_DAPM_DIR_TO_EP(x) BIT(x) - -#define SND_SOC_DAPM_EP_SOURCE SND_SOC_DAPM_DIR_TO_EP(SND_SOC_DAPM_DIR_IN) -#define SND_SOC_DAPM_EP_SINK SND_SOC_DAPM_DIR_TO_EP(SND_SOC_DAPM_DIR_OUT) - /** * snd_soc_dapm_widget_for_each_path - Iterates over all paths in the * specified direction of a widget -- cgit v1.2.3 From 805c019fbb94795e391974f673c5b3e57b825f6d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 1 Jul 2025 00:11:08 +0000 Subject: ASoC: soc-dapm: add prefix on dapm_mark_endpoints_dirty() dapm_mark_endpoints_dirty() is global function. Let's add snd_soc_ prefix. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87ldp8hisj.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 5aeb0822ce0b..ee77a80765c6 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -700,7 +700,7 @@ int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, const char int snd_soc_dapm_force_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_ignore_suspend(struct snd_soc_dapm_context *dapm, const char *pin); unsigned int dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); -void dapm_mark_endpoints_dirty(struct snd_soc_card *card); +void snd_soc_dapm_mark_endpoints_dirty(struct snd_soc_card *card); /* * Marks the specified pin as being not connected, disabling it along -- cgit v1.2.3 From 9d33f9ca4404e532453a0305ce11bf76a9945c5d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 1 Jul 2025 00:11:15 +0000 Subject: ASoC: soc-dapm: add prefix on dapm_xxx_event() dapm_xxx_event() is global function. Let's add snd_soc_ prefix. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87jz4shisc.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index ee77a80765c6..be5ecc276562 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -295,7 +295,7 @@ struct snd_soc_pcm_runtime; #define SND_SOC_DAPM_CLOCK_SUPPLY(wname) \ (struct snd_soc_dapm_widget) { \ .id = snd_soc_dapm_clock_supply, .name = wname, \ - .reg = SND_SOC_NOPM, .event = dapm_clock_event, \ + .reg = SND_SOC_NOPM, .event = snd_soc_dapm_clock_event, \ .event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD } /* generic widgets */ @@ -312,7 +312,7 @@ struct snd_soc_pcm_runtime; #define SND_SOC_DAPM_REGULATOR_SUPPLY(wname, wdelay, wflags) \ (struct snd_soc_dapm_widget) { \ .id = snd_soc_dapm_regulator_supply, .name = wname, \ - .reg = SND_SOC_NOPM, .shift = wdelay, .event = dapm_regulator_event, \ + .reg = SND_SOC_NOPM, .shift = wdelay, .event = snd_soc_dapm_regulator_event, \ .event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD, \ .on_val = wflags} #define SND_SOC_DAPM_PINCTRL(wname, active, sleep) \ @@ -320,7 +320,7 @@ struct snd_soc_pcm_runtime; .id = snd_soc_dapm_pinctrl, .name = wname, \ .priv = (&(struct snd_soc_dapm_pinctrl_priv) \ { .active_state = active, .sleep_state = sleep,}), \ - .reg = SND_SOC_NOPM, .event = dapm_pinctrl_event, \ + .reg = SND_SOC_NOPM, .event = snd_soc_dapm_pinctrl_event, \ .event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD } @@ -630,9 +630,9 @@ enum snd_soc_dapm_direction { #define SND_SOC_DAPM_EP_SOURCE SND_SOC_DAPM_DIR_TO_EP(SND_SOC_DAPM_DIR_IN) #define SND_SOC_DAPM_EP_SINK SND_SOC_DAPM_DIR_TO_EP(SND_SOC_DAPM_DIR_OUT) -int dapm_regulator_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); -int dapm_clock_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); -int dapm_pinctrl_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); +int snd_soc_dapm_regulator_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); +int snd_soc_dapm_clock_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); +int snd_soc_dapm_pinctrl_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); /* dapm controls */ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); -- cgit v1.2.3 From 08dc0f5cc26a203e8008c38d9b436c079e7dbb45 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 1 Jul 2025 00:11:23 +0000 Subject: ASoC: soc-dapm: add prefix on soc_dapm_dev_attrs soc_dapm_dev_attrs is global variable. Let's add snd_soc_ prefix. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87ikkchis6.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index be5ecc276562..0b5c7e6a90c8 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -685,7 +685,7 @@ int snd_soc_dapm_mux_update_power(struct snd_soc_dapm_context *dapm, struct snd_soc_dapm_update *update); /* dapm sys fs - used by the core */ -extern struct attribute *soc_dapm_dev_attrs[]; +extern struct attribute *snd_soc_dapm_dev_attrs[]; void snd_soc_dapm_debugfs_init(struct snd_soc_dapm_context *dapm, struct dentry *parent); /* dapm audio pin control and status */ -- cgit v1.2.3 From c4aa454c64ae022e5a9d55b3c31e9b8dd8a1544f Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 4 Jul 2025 16:03:53 -0700 Subject: bpf: support for void/primitive __arg_untrusted global func params Allow specifying __arg_untrusted for void */char */int */long * parameters. Treat such parameters as PTR_TO_MEM|MEM_RDONLY|PTR_UNTRUSTED of size zero. Intended usage is as follows: int memcmp(char *a __arg_untrusted, char *b __arg_untrusted, size_t n) { bpf_for(i, 0, n) { if (a[i] - b[i]) // load at any offset is allowed return a[i] - b[i]; } return 0; } Allocate register id for ARG_PTR_TO_MEM parameters only when PTR_MAYBE_NULL is set. Register id for PTR_TO_MEM is used only to propagate non-null status after conditionals. Suggested-by: Alexei Starovoitov Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250704230354.1323244-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index a40beb9cf160..9eda6b113f9b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -223,6 +223,7 @@ u32 btf_nr_types(const struct btf *btf); struct btf *btf_base_btf(const struct btf *btf); bool btf_type_is_i32(const struct btf_type *t); bool btf_type_is_i64(const struct btf_type *t); +bool btf_type_is_primitive(const struct btf_type *t); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -- cgit v1.2.3 From 4cdf1bdd45ac78a088773722f009883af30ad318 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Fri, 4 Jul 2025 11:21:34 +0200 Subject: block: reject bs > ps block devices when THP is disabled If THP is disabled and when a block device with logical block size > page size is present, the following null ptr deref panic happens during boot: [ [13.2 mK AOSAN: null-ptr-deref in range [0x0000000000000000-0x0000000000K0 0 0[07] [ 13.017749] RIP: 0010:create_empty_buffers+0x3b/0x380 [ 13.025448] Call Trace: [ 13.025692] [ 13.025895] block_read_full_folio+0x610/0x780 [ 13.026379] ? __pfx_blkdev_get_block+0x10/0x10 [ 13.027008] ? __folio_batch_add_and_move+0x1fa/0x2b0 [ 13.027548] ? __pfx_blkdev_read_folio+0x10/0x10 [ 13.028080] filemap_read_folio+0x9b/0x200 [ 13.028526] ? __pfx_filemap_read_folio+0x10/0x10 [ 13.029030] ? __filemap_get_folio+0x43/0x620 [ 13.029497] do_read_cache_folio+0x155/0x3b0 [ 13.029962] ? __pfx_blkdev_read_folio+0x10/0x10 [ 13.030381] read_part_sector+0xb7/0x2a0 [ 13.030805] read_lba+0x174/0x2c0 [ 13.045348] nvme_scan_ns+0x684/0x850 [nvme_core] [ 13.045858] ? __pfx_nvme_scan_ns+0x10/0x10 [nvme_core] [ 13.046414] ? _raw_spin_unlock+0x15/0x40 [ 13.046843] ? __switch_to+0x523/0x10a0 [ 13.047253] ? kvm_clock_get_cycles+0x14/0x30 [ 13.047742] ? __pfx_nvme_scan_ns_async+0x10/0x10 [nvme_core] [ 13.048353] async_run_entry_fn+0x96/0x4f0 [ 13.048787] process_one_work+0x667/0x10a0 [ 13.049219] worker_thread+0x63c/0xf60 As large folio support depends on THP, only allow bs > ps block devices if THP is enabled. Fixes: 47dd67532303 ("block/bdev: lift block size restrictions to 64k") Signed-off-by: Pankaj Raghav Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20250704092134.289491-1-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 332b56f323d9..369a8e63c865 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) * however we constrain this to what we can validate and test. */ #define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) -- cgit v1.2.3 From d42c7c6fd66a6e2a78ae1da666c5df6c2fde8389 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 3 Jul 2025 14:27:06 +0300 Subject: PM: domains: Add flags to specify power on attach/detach Calling dev_pm_domain_attach()/dev_pm_domain_detach() in bus driver probe/remove functions can affect system behavior when the drivers attached to the bus use devres-managed resources. Since devres actions may need to access device registers, calling dev_pm_domain_detach() too early, i.e., before these actions complete, can cause failures on some systems. One such example is Renesas RZ/G3S SoC-based platforms. If the device clocks are managed via PM domains, invoking dev_pm_domain_detach() in the bus driver's remove function removes the device's clocks from the PM domain, preventing any subsequent pm_runtime_resume*() calls from enabling those clocks. The second argument of dev_pm_domain_attach() specifies whether the PM domain should be powered on during attachment. Likewise, the second argument of dev_pm_domain_detach() indicates whether the domain should be powered off during detachment. Upcoming changes address the issue described above (initially for the platform bus only) by deferring the call to dev_pm_domain_detach() until after devres_release_all() in device_unbind_cleanup(). The detach_power_off field in struct dev_pm_info stores the detach power off info from the second argument of dev_pm_domain_attach(). Because there are cases where the device's PM domain power-on/off behavior must be conditional (e.g., in i2c_device_probe()), the patch introduces PD_FLAG_ATTACH_POWER_ON and PD_FLAG_DETACH_POWER_OFF flags to be passed to dev_pm_domain_attach(). Finally, dev_pm_domain_attach() and its users are updated to use the newly introduced PD_FLAG_ATTACH_POWER_ON and PD_FLAG_DETACH_POWER_OFF macros. This change is preparatory. Signed-off-by: Claudiu Beznea Reviewed-by: Mathieu Poirier Acked-by: Wolfram Sang # I2C Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/20250703112708.1621607-2-claudiu.beznea.uj@bp.renesas.com [ rjw: Changelog adjustments ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 0b18160901a2..62a35a78ce9b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -36,10 +36,16 @@ * isn't specified, the index just follows the * index for the attached PM domain. * + * PD_FLAG_ATTACH_POWER_ON: Power on the domain during attach. + * + * PD_FLAG_DETACH_POWER_OFF: Power off the domain during detach. + * */ #define PD_FLAG_NO_DEV_LINK BIT(0) #define PD_FLAG_DEV_LINK_ON BIT(1) #define PD_FLAG_REQUIRED_OPP BIT(2) +#define PD_FLAG_ATTACH_POWER_ON BIT(3) +#define PD_FLAG_DETACH_POWER_OFF BIT(4) struct dev_pm_domain_attach_data { const char * const *pd_names; @@ -501,7 +507,7 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ #ifdef CONFIG_PM -int dev_pm_domain_attach(struct device *dev, bool power_on); +int dev_pm_domain_attach(struct device *dev, u32 flags); struct device *dev_pm_domain_attach_by_id(struct device *dev, unsigned int index); struct device *dev_pm_domain_attach_by_name(struct device *dev, @@ -518,7 +524,7 @@ int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); #else -static inline int dev_pm_domain_attach(struct device *dev, bool power_on) +static inline int dev_pm_domain_attach(struct device *dev, u32 flags) { return 0; } -- cgit v1.2.3 From f99508074e78fea17f06d753d9ef453b174ec98e Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 3 Jul 2025 14:27:07 +0300 Subject: PM: domains: Detach on device_unbind_cleanup() The dev_pm_domain_attach() function is typically used in bus code alongside dev_pm_domain_detach(), often following patterns like: static int bus_probe(struct device *_dev) { struct bus_driver *drv = to_bus_driver(dev->driver); struct bus_device *dev = to_bus_device(_dev); int ret; // ... ret = dev_pm_domain_attach(_dev, true); if (ret) return ret; if (drv->probe) ret = drv->probe(dev); // ... } static void bus_remove(struct device *_dev) { struct bus_driver *drv = to_bus_driver(dev->driver); struct bus_device *dev = to_bus_device(_dev); if (drv->remove) drv->remove(dev); dev_pm_domain_detach(_dev); } When the driver's probe function uses devres-managed resources that depend on the power domain state, those resources are released later during device_unbind_cleanup(). Releasing devres-managed resources that depend on the power domain state after detaching the device from its PM domain can cause failures. For example, if the driver uses devm_pm_runtime_enable() in its probe function, and the device's clocks are managed by the PM domain, then during removal the runtime PM is disabled in device_unbind_cleanup() after the clocks have been removed from the PM domain. It may happen that the devm_pm_runtime_enable() action causes the device to be runtime- resumed. If the driver specific runtime PM APIs access registers directly, this will lead to accessing device registers without clocks being enabled. Similar issues may occur with other devres actions that access device registers. Add detach_power_off member to struct dev_pm_info, to be used later in device_unbind_cleanup() as the power_off argument for dev_pm_domain_detach(). This is a preparatory step toward removing dev_pm_domain_detach() calls from bus remove functions. Since the current PM domain detach functions (genpd_dev_pm_detach() and acpi_dev_pm_detach()) already set dev->pm_domain = NULL, there should be no issues with bus drivers that still call dev_pm_domain_detach() in their remove functions. Signed-off-by: Claudiu Beznea Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/20250703112708.1621607-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 938b1b446a5d..14e8370887e3 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -721,6 +721,7 @@ struct dev_pm_info { struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ void (*set_latency_tolerance)(struct device *, s32); struct dev_pm_qos *qos; + bool detach_power_off:1; /* Owned by the driver core */ }; extern int dev_pm_get_subsys_data(struct device *dev); -- cgit v1.2.3 From 59f44c9ccc3bb68aa3b062b8e57ce0e1ee2fca75 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 2 Jul 2025 17:50:34 +0200 Subject: net: openvswitch: allow providing upcall pid for the 'execute' command When a packet enters OVS datapath and there is no flow to handle it, packet goes to userspace through a MISS upcall. With per-CPU upcall dispatch mechanism, we're using the current CPU id to select the Netlink PID on which to send this packet. This allows us to send packets from the same traffic flow through the same handler. The handler will process the packet, install required flow into the kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE. While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a recirculation action that will pass the (likely modified) packet through the flow lookup again. And if the flow is not found, the packet will be sent to userspace again through another MISS upcall. However, the handler thread in userspace is likely running on a different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled in the syscall context of that thread. So, when the time comes to send the packet through another upcall, the per-CPU dispatch will choose a different Netlink PID, and this packet will end up processed by a different handler thread on a different CPU. The process continues as long as there are new recirculations, each time the packet goes to a different handler thread before it is sent out of the OVS datapath to the destination port. In real setups the number of recirculations can go up to 4 or 5, sometimes more. There is always a chance to re-order packets while processing upcalls, because userspace will first install the flow and then re-inject the original packet. So, there is a race window when the flow is already installed and the second packet can match it and be forwarded to the destination before the first packet is re-injected. But the fact that packets are going through multiple upcalls handled by different userspace threads makes the reordering noticeably more likely, because we not only have a race between the kernel and a userspace handler (which is hard to avoid), but also between multiple userspace handlers. For example, let's assume that 10 packets got enqueued through a MISS upcall for handler-1, it will start processing them, will install the flow into the kernel and start re-injecting packets back, from where they will go through another MISS to handler-2. Handler-2 will install the flow into the kernel and start re-injecting the packets, while handler-1 continues to re-inject the last of the 10 packets, they will hit the flow installed by handler-2 and be forwarded without going to the handler-2, while handler-2 still re-injects the first of these 10 packets. Given multiple recirculations and misses, these 10 packets may end up completely mixed up on the output from the datapath. Let's allow userspace to specify on which Netlink PID the packets should be upcalled while processing OVS_PACKET_CMD_EXECUTE. This makes it possible to ensure that all the packets are processed by the same handler thread in the userspace even with them being upcalled multiple times in the process. Packets will remain in order since they will be enqueued to the same socket and re-injected in the same order. This doesn't eliminate re-ordering as stated above, since we still have a race between kernel and the userspace thread, but it allows to eliminate races between multiple userspace threads. Userspace knows the PID of the socket on which the original upcall is received, so there is no need to send it up from the kernel. Solution requires storing the value somewhere for the duration of the packet processing. There are two potential places for this: our skb extension or the per-CPU storage. It's not clear which is better, so just following currently used scheme of storing this kind of things along the skb. We still have a decent amount of space in the cb. Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner Acked-by: Eelco Chaudron Acked-by: Aaron Conole Link: https://patch.msgid.link/20250702155043.2331772-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3a701bd1f31b..3092c2c6f1d2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -186,6 +186,11 @@ enum ovs_packet_cmd { * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment * size. * @OVS_PACKET_ATTR_HASH: Packet hash info (e.g. hash, sw_hash and l4_hash in skb). + * @OVS_PACKET_ATTR_UPCALL_PID: Netlink PID to use for upcalls while + * processing %OVS_PACKET_CMD_EXECUTE. Takes precedence over all other ways + * to determine the Netlink PID including %OVS_USERSPACE_ATTR_PID, + * %OVS_DP_ATTR_UPCALL_PID, %OVS_DP_ATTR_PER_CPU_PIDS and the + * %OVS_VPORT_ATTR_UPCALL_PID. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -205,6 +210,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ OVS_PACKET_ATTR_HASH, /* Packet hash. */ + OVS_PACKET_ATTR_UPCALL_PID, /* u32 Netlink PID. */ __OVS_PACKET_ATTR_MAX }; -- cgit v1.2.3 From f55ce5a6cd33211c8cc5bce0554b6ac710a6a28b Mon Sep 17 00:00:00 2001 From: Ankit Agrawal Date: Sat, 5 Jul 2025 07:17:17 +0000 Subject: KVM: arm64: Expose new KVM cap for cacheable PFNMAP Introduce a new KVM capability to expose to the userspace whether cacheable mapping of PFNMAP is supported. The ability to safely do the cacheable mapping of PFNMAP is contingent on S2FWB and ARM64_HAS_CACHE_DIC. S2FWB allows KVM to avoid flushing the D cache, ARM64_HAS_CACHE_DIC allows KVM to avoid flushing the icache and turns icache_inval_pou() into a NOP. The cap would be false if those requirements are missing and is checked by making use of kvm_arch_supports_cacheable_pfnmap. This capability would allow userspace to discover the support. It could for instance be used by userspace to prevent live-migration across FWB and non-FWB hosts. CC: Catalin Marinas CC: Jason Gunthorpe CC: Oliver Upton CC: David Hildenbrand Suggested-by: Marc Zyngier Reviewed-by: Jason Gunthorpe Tested-by: Donald Dutile Signed-off-by: Ankit Agrawal Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250705071717.5062-7-ankita@nvidia.com Signed-off-by: Oliver Upton --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..e4e566ff348b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -956,6 +956,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 1e3b66e326015f77bc4b36976bebeedc2ac0f588 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 3 Jul 2025 13:23:29 +0200 Subject: vsock: fix `vsock_proto` declaration From commit 634f1a7110b4 ("vsock: support sockmap"), `struct proto vsock_proto`, defined in af_vsock.c, is not static anymore, since it's used by vsock_bpf.c. If CONFIG_BPF_SYSCALL is not defined, `make C=2` will print a warning: $ make O=build C=2 W=1 net/vmw_vsock/ ... CC [M] net/vmw_vsock/af_vsock.o CHECK ../net/vmw_vsock/af_vsock.c ../net/vmw_vsock/af_vsock.c:123:14: warning: symbol 'vsock_proto' was not declared. Should it be static? Declare `vsock_proto` regardless of CONFIG_BPF_SYSCALL, since it's defined in af_vsock.c, which is built regardless of CONFIG_BPF_SYSCALL. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20250703112329.28365-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- include/net/af_vsock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else -- cgit v1.2.3 From 4369d40da2f28ae1d3caadd4eb5d7b7f49a3776f Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 2 Jul 2025 14:32:55 +0900 Subject: netmem: use _Generic to cover const casting for page_to_netmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current page_to_netmem() doesn't cover const casting resulting in trying to cast const struct page * to const netmem_ref fails. To cover the case, change page_to_netmem() to use macro and _Generic. Signed-off-by: Byungchul Park Reviewed-by: Mina Almasry Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Pavel Begunkov Link: https://patch.msgid.link/20250702053256.4594-5-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index 7a1dafa3f080..de1d95f04076 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -139,10 +139,9 @@ static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) return (__force netmem_ref)((unsigned long)niov | NET_IOV); } -static inline netmem_ref page_to_netmem(const struct page *page) -{ - return (__force netmem_ref)page; -} +#define page_to_netmem(p) (_Generic((p), \ + const struct page * : (__force const netmem_ref)(p), \ + struct page * : (__force netmem_ref)(p))) /** * virt_to_netmem - convert virtual memory pointer to a netmem reference -- cgit v1.2.3 From d8bf56a0ca10af7936de8bbdd510c33041dacecc Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 2 Jul 2025 14:32:56 +0900 Subject: page_pool: make page_pool_get_dma_addr() just wrap page_pool_get_dma_addr_netmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page pool members in struct page cannot be removed unless it's not allowed to access any of them via struct page. Do not access 'page->dma_addr' directly in page_pool_get_dma_addr() but just wrap page_pool_get_dma_addr_netmem() safely. Signed-off-by: Byungchul Park Reviewed-by: Mina Almasry Reviewed-by: Ilias Apalodimas Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Pavel Begunkov Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20250702053256.4594-6-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 773fc65780b5..db180626be06 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -444,12 +444,7 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) - ret <<= PAGE_SHIFT; - - return ret; + return page_pool_get_dma_addr_netmem(page_to_netmem(page)); } static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, -- cgit v1.2.3 From a683a5b2ba23598ad343e5ec10a4ef4077497fc9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jul 2025 06:34:37 +0100 Subject: fold fs_struct->{lock,seq} into a seqlock The combination of spinlock_t lock and seqcount_spinlock_t seq in struct fs_struct is an open-coded seqlock_t (see linux/seqlock_types.h). Combine and switch to equivalent seqlock_t primitives. AFAICS, that does end up with the same sequence of underlying operations in all cases. While we are at it, get_fs_pwd() is open-coded verbatim in get_path_from_fd(); rather than applying conversion to it, replace with the call of get_fs_pwd() there. Not worth splitting the commit for that, IMO... A bit of historical background - conversion of seqlock_t to use of seqcount_spinlock_t happened several months after the same had been done to struct fs_struct; switching fs_struct to seqlock_t could've been done immediately after that, but it looks like nobody had gotten around to that until now. Signed-off-by: Al Viro Link: https://lore.kernel.org/20250702053437.GC1880847@ZenIV Acked-by: Ahmed S. Darwish Acked-by: Peter Zijlstra (Intel) Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- include/linux/fs_struct.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 783b48dedb72..baf200ab5c77 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -8,8 +8,7 @@ struct fs_struct { int users; - spinlock_t lock; - seqcount_spinlock_t seq; + seqlock_t seq; int umask; int in_exec; struct path root, pwd; @@ -26,18 +25,18 @@ extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { - spin_lock(&fs->lock); + read_seqlock_excl(&fs->seq); *root = fs->root; path_get(root); - spin_unlock(&fs->lock); + read_sequnlock_excl(&fs->seq); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { - spin_lock(&fs->lock); + read_seqlock_excl(&fs->seq); *pwd = fs->pwd; path_get(pwd); - spin_unlock(&fs->lock); + read_sequnlock_excl(&fs->seq); } extern bool current_chrooted(void); -- cgit v1.2.3 From ce7a381697cb3958ffe0b45e5028ac69444e9288 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 27 Jun 2025 21:49:28 +0800 Subject: net: bonding: add broadcast_neighbor option for 802.3ad Stacking technology is a type of technology used to expand ports on Ethernet switches. It is widely used as a common access method in large-scale Internet data center architectures. Years of practice have proved that stacking technology has advantages and disadvantages in high-reliability network architecture scenarios. For instance, in stacking networking arch, conventional switch system upgrades require multiple stacked devices to restart at the same time. Therefore, it is inevitable that the business will be interrupted for a while. It is for this reason that "no-stacking" in data centers has become a trend. Additionally, when the stacking link connecting the switches fails or is abnormal, the stack will split. Although it is not common, it still happens in actual operation. The problem is that after the split, it is equivalent to two switches with the same configuration appearing in the network, causing network configuration conflicts and ultimately interrupting the services carried by the stacking system. To improve network stability, "non-stacking" solutions have been increasingly adopted, particularly by public cloud providers and tech companies like Alibaba, Tencent, and Didi. "non-stacking" is a method of mimicing switch stacking that convinces a LACP peer, bonding in this case, connected to a set of "non-stacked" switches that all of its ports are connected to a single switch (i.e., LACP aggregator), as if those switches were stacked. This enables the LACP peer's ports to aggregate together, and requires (a) special switch configuration, described in the linked article, and (b) modifications to the bonding 802.3ad (LACP) mode to send all ARP/ND packets across all ports of the active aggregator. Note that, with multiple aggregators, the current broadcast mode logic will send only packets to the selected aggregator(s). +-----------+ +-----------+ | switch1 | | switch2 | +-----------+ +-----------+ ^ ^ | | +-----------------+ | bond4 lacp | +-----------------+ | | | NIC1 | NIC2 +-----------------+ | server | +-----------------+ - https://www.ruijie.com/fr-fr/support/tech-gallery/de-stack-data-center-network-architecture/ Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Nikolay Aleksandrov Signed-off-by: Tonghao Zhang Signed-off-by: Zengbing Tu Link: https://patch.msgid.link/84d0a044514157bb856a10b6d03a1028c4883561.1751031306.git.tonghao@bamaicloud.com Signed-off-by: Paolo Abeni --- include/net/bond_options.h | 1 + include/net/bonding.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 18687ccf0638..022b122a9fb6 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -77,6 +77,7 @@ enum { BOND_OPT_NS_TARGETS, BOND_OPT_PRIO, BOND_OPT_COUPLED_CONTROL, + BOND_OPT_BROADCAST_NEIGH, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 95f67b308c19..e06f0d63b2c1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -115,6 +115,8 @@ static inline int is_netpoll_tx_blocked(struct net_device *dev) #define is_netpoll_tx_blocked(dev) (0) #endif +DECLARE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled); + struct bond_params { int mode; int xmit_policy; @@ -149,6 +151,7 @@ struct bond_params { struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; #endif int coupled_control; + int broadcast_neighbor; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; -- cgit v1.2.3 From 3d98ee52659c3f1d3913ae5b97f7743c5247752c Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 27 Jun 2025 21:49:29 +0800 Subject: net: bonding: add broadcast_neighbor netlink option User can config or display the bonding broadcast_neighbor option via iproute2/netlink. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Nikolay Aleksandrov Signed-off-by: Tonghao Zhang Signed-off-by: Zengbing Tu Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/76b90700ba5b98027dfb51a2f3c5cfea0440a21b.1751031306.git.tonghao@bamaicloud.com Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 873c285996fe..784ace3a519c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1535,6 +1535,7 @@ enum { IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, IFLA_BOND_COUPLED_CONTROL, + IFLA_BOND_BROADCAST_NEIGH, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From 269936db5eb3962fe290b1dc4dbf1859cd5a04dd Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 2 Jul 2025 14:20:03 +0800 Subject: net: mctp: separate routing database from routing operations This change adds a struct mctp_dst, representing the result of a routing lookup. This decouples the struct mctp_route from the actual implementation of a routing operation. This will allow for future routing changes which may require more involved lookup logic, such as gateway routing - which may require multiple traversals of the routing table. Since we only use the struct mctp_route at lookup time, we no longer hold routes over a routing operation, as we only need it to populate the dst. However, we do hold the dev while the dst is active. This requires some changes to the route test infrastructure, as we no longer have a mock route to handle the route output operation, and transient dsts are created by the routing code, so we can't override them as easily. Instead, we use kunit->priv to stash a packet queue, and a custom dst_output function queues into that packet queue, which we can use for later expectations. Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250702-dev-forwarding-v5-3-1468191da8a4@codeconstruct.com.au Signed-off-by: Paolo Abeni --- include/net/mctp.h | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 07d458990113..6c9c5c48f59a 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -222,6 +222,8 @@ struct mctp_flow { struct mctp_sk_key *key; }; +struct mctp_dst; + /* Route definition. * * These are held in the pernet->mctp.routes list, with RCU protection for @@ -229,8 +231,7 @@ struct mctp_flow { * dropped on NETDEV_UNREGISTER events. * * Updates to the route table are performed under rtnl; all reads under RCU, - * so routes cannot be referenced over a RCU grace period. Specifically: A - * caller cannot block between mctp_route_lookup and mctp_route_release() + * so routes cannot be referenced over a RCU grace period. */ struct mctp_route { mctp_eid_t min, max; @@ -238,7 +239,7 @@ struct mctp_route { unsigned char type; unsigned int mtu; struct mctp_dev *dev; - int (*output)(struct mctp_route *route, + int (*output)(struct mctp_dst *dst, struct sk_buff *skb); struct list_head list; @@ -246,12 +247,34 @@ struct mctp_route { struct rcu_head rcu; }; +/* Route lookup result: dst. Represents the results of a routing decision, + * but is only held over the individual routing operation. + * + * Will typically be stored on the caller stack, and must be released after + * usage. + */ +struct mctp_dst { + struct mctp_dev *dev; + unsigned int mtu; + + /* set for direct addressing */ + unsigned char halen; + unsigned char haddr[MAX_ADDR_LEN]; + + int (*output)(struct mctp_dst *dst, struct sk_buff *skb); +}; + +int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, + unsigned char halen, const unsigned char *haddr); + /* route interfaces */ -struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, - mctp_eid_t daddr); +int mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr, struct mctp_dst *dst); + +void mctp_dst_release(struct mctp_dst *dst); /* always takes ownership of skb */ -int mctp_local_output(struct sock *sk, struct mctp_route *rt, +int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); void mctp_key_unref(struct mctp_sk_key *key); -- cgit v1.2.3 From 3007f90ec0385304ab5794e9585427b73f40e32f Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 2 Jul 2025 14:20:04 +0800 Subject: net: mctp: separate cb from direct-addressing routing Now that we have the dst->haddr populated by sendmsg (when extended addressing is in use), we no longer need to stash the link-layer address in the skb->cb. Instead, only use skb->cb for incoming lladdr data. While we're at it: remove cb->src, as was never used. Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250702-dev-forwarding-v5-4-1468191da8a4@codeconstruct.com.au Signed-off-by: Paolo Abeni --- include/net/mctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 6c9c5c48f59a..b3af0690f607 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -183,8 +183,8 @@ struct mctp_sk_key { struct mctp_skb_cb { unsigned int magic; unsigned int net; - int ifindex; /* extended/direct addressing if set */ - mctp_eid_t src; + /* fields below provide extended addressing for ingress to recvmsg() */ + int ifindex; unsigned char halen; unsigned char haddr[MAX_ADDR_LEN]; }; -- cgit v1.2.3 From ad39c12fcee34b8980a80ad5c803bca9906fbd4e Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 2 Jul 2025 14:20:13 +0800 Subject: net: mctp: add gateway routing support This change allows for gateway routing, where a route table entry may reference a routable endpoint (by network and EID), instead of routing directly to a netdevice. We add support for a RTM_GATEWAY attribute for netlink route updates, with an attribute format of: struct mctp_fq_addr { unsigned int net; mctp_eid_t eid; } - we need the net here to uniquely identify the target EID, as we no longer have the device reference directly (which would provide the net id in the case of direct routes). This makes route lookups recursive, as a route lookup that returns a gateway route must be resolved into a direct route (ie, to a device) eventually. We provide a limit to the route lookups, to prevent infinite loop routing. The route lookup populates a new 'nexthop' field in the dst structure, which now specifies the key for the neighbour table lookup on device output, rather than using the packet destination address directly. Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250702-dev-forwarding-v5-13-1468191da8a4@codeconstruct.com.au Signed-off-by: Paolo Abeni --- include/net/mctp.h | 13 ++++++++++++- include/uapi/linux/mctp.h | 8 ++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index b3af0690f607..ac4f4ecdfc24 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -237,8 +237,18 @@ struct mctp_route { mctp_eid_t min, max; unsigned char type; + unsigned int mtu; - struct mctp_dev *dev; + + enum { + MCTP_ROUTE_DIRECT, + MCTP_ROUTE_GATEWAY, + } dst_type; + union { + struct mctp_dev *dev; + struct mctp_fq_addr gateway; + }; + int (*output)(struct mctp_dst *dst, struct sk_buff *skb); @@ -256,6 +266,7 @@ struct mctp_route { struct mctp_dst { struct mctp_dev *dev; unsigned int mtu; + mctp_eid_t nexthop; /* set for direct addressing */ unsigned char halen; diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index e1db65df9359..19ad12a0cd4b 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -37,6 +37,14 @@ struct sockaddr_mctp_ext { __u8 smctp_haddr[MAX_ADDR_LEN]; }; +/* A "fully qualified" MCTP address, which includes the system-local network ID, + * required to uniquely resolve a routable EID. + */ +struct mctp_fq_addr { + unsigned int net; + mctp_eid_t eid; +}; + #define MCTP_NET_ANY 0x0 #define MCTP_ADDR_NULL 0x00 -- cgit v1.2.3 From 84a7d6797e6a03705e6b48c613fa424662049d87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jul 2025 07:12:30 +0000 Subject: net/sched: acp_api: no longer acquire RTNL in tc_action_net_exit() tc_action_net_exit() got an rtnl exclusion in commit a159d3c4b829 ("net_sched: acquire RTNL in tc_action_net_exit()") Since then, commit 16af6067392c ("net: sched: implement reference counted action release") made this RTNL exclusion obsolete for most cases. Only tcf_action_offload_del() might still require it. Move the rtnl locking into tcf_idrinfo_destroy() when an offload action is found. Most netns do not have actions, yet deleting them is adding a lot of pressure on RTNL, which is for many the most contended mutex in the kernel. We are moving to a per-netns 'rtnl', so tc_action_net_exit() will not be able to grab 'rtnl' a single time for a batch of netns. Before the patch: perf probe -a rtnl_lock perf record -e probe:rtnl_lock -a /bin/bash -c 'unshare -n "/bin/true"; sleep 1' [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.305 MB perf.data (25 samples) ] After the patch: perf record -e probe:rtnl_lock -a /bin/bash -c 'unshare -n "/bin/true"; sleep 1' [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.304 MB perf.data (9 samples) ] Signed-off-by: Eric Dumazet Cc: Vlad Buslov Cc: Jiri Pirko Cc: Marcelo Ricardo Leitner Link: https://patch.msgid.link/20250702071230.1892674-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 404df8557f6a..04781c92b43d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -170,14 +170,12 @@ static inline void tc_action_net_exit(struct list_head *net_list, { struct net *net; - rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { struct tc_action_net *tn = net_generic(net, id); tcf_idrinfo_destroy(tn->ops, tn->idrinfo); kfree(tn->idrinfo); } - rtnl_unlock(); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, -- cgit v1.2.3 From b441cf3f8c4b8576639d20c8eb4aa32917602ecd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 4 Jul 2025 16:54:33 +0200 Subject: xfrm: delete x->tunnel as we delete x The ipcomp fallback tunnels currently get deleted (from the various lists and hashtables) as the last user state that needed that fallback is destroyed (not deleted). If a reference to that user state still exists, the fallback state will remain on the hashtables/lists, triggering the WARN in xfrm_state_fini. Because of those remaining references, the fix in commit f75a2804da39 ("xfrm: destroy xfrm_state synchronously on net exit path") is not complete. We recently fixed one such situation in TCP due to defered freeing of skbs (commit 9b6412e6979f ("tcp: drop secpath at the same time as we currently drop dst")). This can also happen due to IP reassembly: skbs with a secpath remain on the reassembly queue until netns destruction. If we can't guarantee that the queues are flushed by the time xfrm_state_fini runs, there may still be references to a (user) xfrm_state, preventing the timely deletion of the corresponding fallback state. Instead of chasing each instance of skbs holding a secpath one by one, this patch fixes the issue directly within xfrm, by deleting the fallback state as soon as the last user state depending on it has been deleted. Destruction will still happen when the final reference is dropped. A separate lockdep class for the fallback state is required since we're going to lock x->tunnel while x is locked. Fixes: 9d4139c76905 ("netns xfrm: per-netns xfrm_state_all list") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e45a275fca26..91d52a380e37 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -441,7 +441,6 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); void xfrm_flush_gc(void); -void xfrm_state_delete_tunnel(struct xfrm_state *x); struct xfrm_type { struct module *owner; -- cgit v1.2.3 From 2a198bbec6913ae1c90ec963750003c6213668c7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 4 Jul 2025 16:54:34 +0200 Subject: Revert "xfrm: destroy xfrm_state synchronously on net exit path" This reverts commit f75a2804da391571563c4b6b29e7797787332673. With all states (whether user or kern) removed from the hashtables during deletion, there's no need for synchronous destruction of states. xfrm6_tunnel states still need to have been destroyed (which will be the case when its last user is deleted (not destroyed)) so that xfrm6_tunnel_free_spi removes it from the per-netns hashtable before the netns is destroyed. This has the benefit of skipping one synchronize_rcu per state (in __xfrm_state_destroy(sync=true)) when we exit a netns. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 91d52a380e37..f3014e4f54fc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -915,7 +915,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) xfrm_pol_put(pols[i]); } -void __xfrm_state_destroy(struct xfrm_state *, bool); +void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { @@ -925,13 +925,7 @@ static inline void __xfrm_state_put(struct xfrm_state *x) static inline void xfrm_state_put(struct xfrm_state *x) { if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x, false); -} - -static inline void xfrm_state_put_sync(struct xfrm_state *x) -{ - if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x, true); + __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) @@ -1769,7 +1763,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid); -- cgit v1.2.3 From e22da4685013922ade8e7b5e727ac6804fe5b51e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 1 Jul 2025 16:46:57 +0200 Subject: net/handshake: Add new parameter 'HANDSHAKE_A_ACCEPT_KEYRING' Add a new netlink parameter 'HANDSHAKE_A_ACCEPT_KEYRING' to provide the serial number of the keyring to use. Signed-off-by: Hannes Reinecke Reviewed-by: Chuck Lever Acked-by: Jakub Kicinski Link: https://patch.msgid.link/20250701144657.104401-1-hare@kernel.org Signed-off-by: Paolo Abeni --- include/uapi/linux/handshake.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h index 3d7ea58778c9..662e7de46c54 100644 --- a/include/uapi/linux/handshake.h +++ b/include/uapi/linux/handshake.h @@ -45,6 +45,7 @@ enum { HANDSHAKE_A_ACCEPT_PEER_IDENTITY, HANDSHAKE_A_ACCEPT_CERTIFICATE, HANDSHAKE_A_ACCEPT_PEERNAME, + HANDSHAKE_A_ACCEPT_KEYRING, __HANDSHAKE_A_ACCEPT_MAX, HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1) -- cgit v1.2.3 From 70b9c0c11e55167b9552ef395bc00f4920299177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 30 Jun 2025 15:02:18 +0200 Subject: uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again (2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BITS_PER_LONG does not exist in UAPI headers, so can't be used by the UAPI __GENMASK(). Instead __BITS_PER_LONG needs to be used. When __GENMASK() was introduced in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"), the code was fine. A broken revert in 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") introduced the incorrect usage of BITS_PER_LONG. That was fixed in commit 11fcf368506d ("uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again"). But a broken sync of the kernel headers with the tools/ headers in commit fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") undid the fix. Reapply the fix and while at it also fix the tools header. Fixes: fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") Signed-off-by: Thomas Weißschuh Acked-by: Yury Norov (NVIDIA) Signed-off-by: Yury Norov (NVIDIA) --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From 25489a4f556414445d342951615178368ee45cde Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Wed, 2 Jul 2025 15:38:08 +0200 Subject: net: splice: Drop unused @gfp Since its introduction in commit 2e910b95329c ("net: Add a function to splice pages into an skbuff for MSG_SPLICE_PAGES"), skb_splice_from_iter() never used the @gfp argument. Remove it and adapt callers. No functional change intended. Reviewed-by: Simon Horman Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20250702-splice-drop-unused-v3-2-55f68b60d2b7@rbox.co Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4f6dcb37bae8..b8b06e71b73e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5265,7 +5265,7 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb) } ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, - ssize_t maxsize, gfp_t gfp); + ssize_t maxsize); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From c523058713abac66b0d83ae12a0574d76cd7df2b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 3 Jul 2025 07:55:52 +0200 Subject: net: phy: declare package-related struct members only if CONFIG_PHY_PACKAGE is enabled Now that we have an own config symbol for the PHY package module, we can use it to reduce size of these structs if it isn't enabled. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/f0daefa4-406a-4a06-a4f0-7e31309f82bc@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 74c1bcf64b3c..543a94751a6b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -409,8 +409,10 @@ struct mii_bus { /** @shared_lock: protect access to the shared element */ struct mutex shared_lock; +#if IS_ENABLED(CONFIG_PHY_PACKAGE) /** @shared: shared state across different PHYs */ struct phy_package_shared *shared[PHY_MAX_ADDR]; +#endif }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) @@ -718,9 +720,11 @@ struct phy_device { /* For use by PHYs to maintain extra state */ void *priv; +#if IS_ENABLED(CONFIG_PHY_PACKAGE) /* shared data pointer */ /* For use by PHYs inside the same package that need a shared state. */ struct phy_package_shared *shared; +#endif /* Reporting cable test results */ struct sk_buff *skb; -- cgit v1.2.3 From e7d4c1c5a54648fd5b787a4a0f81521ec7260bcd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:49 +0200 Subject: virtio: introduce extended features The virtio specifications allows for up to 128 bits for the device features. Soon we are going to use some of the 'extended' bits features (above 64) for the virtio_net driver. Introduce extended features as a fixed size array of u64. To minimize the diffstat allows legacy driver to access the low 64 bits via a transparent union. Introduce an extended get_extended_features configuration callback that devices supporting the extended features range must implement in place of the traditional one. Note that legacy and transport features don't need any change, as they are always in the low 64 bit range. Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- include/linux/virtio.h | 9 +++-- include/linux/virtio_config.h | 43 ++++++++++---------- include/linux/virtio_features.h | 88 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 25 deletions(-) create mode 100644 include/linux/virtio_features.h (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 64cb4b04be7a..04b90c88d164 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -11,6 +11,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -141,7 +142,9 @@ struct virtio_admin_cmd { * @config: the configuration ops for this device. * @vringh_config: configuration ops for host vrings. * @vqs: the list of virtqueues for this device. - * @features: the features supported by both driver and device. + * @features: the 64 lower features supported by both driver and device. + * @features_array: the full features space supported by both driver and + * device. * @priv: private pointer for the driver's use. * @debugfs_dir: debugfs directory entry. * @debugfs_filter_features: features to be filtered set by debugfs. @@ -159,11 +162,11 @@ struct virtio_device { const struct virtio_config_ops *config; const struct vringh_config_ops *vringh_config; struct list_head vqs; - u64 features; + VIRTIO_DECLARE_FEATURES(features); void *priv; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; - u64 debugfs_filter_features; + u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; #endif }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index b3e1d30c765b..918cf25cd3c6 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -77,7 +77,11 @@ struct virtqueue_info { * vdev: the virtio_device * @get_features: get the array of feature bits for this device. * vdev: the virtio_device - * Returns the first 64 feature bits (all we currently need). + * Returns the first 64 feature bits. + * @get_extended_features: + * vdev: the virtio_device + * Returns the first VIRTIO_FEATURES_MAX feature bits (all we currently + * need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device * This sends the driver feature bits to the device: it can change @@ -121,6 +125,8 @@ struct virtio_config_ops { void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); + void (*get_extended_features)(struct virtio_device *vdev, + u64 *features); int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, @@ -147,13 +153,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, static inline bool __virtio_test_bit(const struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - return vdev->features & BIT_ULL(fbit); + return virtio_features_test_bit(vdev->features_array, fbit); } /** @@ -164,13 +164,7 @@ static inline bool __virtio_test_bit(const struct virtio_device *vdev, static inline void __virtio_set_bit(struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - vdev->features |= BIT_ULL(fbit); + virtio_features_set_bit(vdev->features_array, fbit); } /** @@ -181,13 +175,7 @@ static inline void __virtio_set_bit(struct virtio_device *vdev, static inline void __virtio_clear_bit(struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - vdev->features &= ~BIT_ULL(fbit); + virtio_features_clear_bit(vdev->features_array, fbit); } /** @@ -204,6 +192,17 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, return __virtio_test_bit(vdev, fbit); } +static inline void virtio_get_features(struct virtio_device *vdev, + u64 *features) +{ + if (vdev->config->get_extended_features) { + vdev->config->get_extended_features(vdev, features); + return; + } + + virtio_features_from_u64(features, vdev->config->get_features(vdev)); +} + /** * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h new file mode 100644 index 000000000000..f748f2f87de8 --- /dev/null +++ b/include/linux/virtio_features.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_FEATURES_H +#define _LINUX_VIRTIO_FEATURES_H + +#include + +#define VIRTIO_FEATURES_DWORDS 2 +#define VIRTIO_FEATURES_MAX (VIRTIO_FEATURES_DWORDS * 64) +#define VIRTIO_FEATURES_WORDS (VIRTIO_FEATURES_DWORDS * 2) +#define VIRTIO_BIT(b) BIT_ULL((b) & 0x3f) +#define VIRTIO_DWORD(b) ((b) >> 6) +#define VIRTIO_DECLARE_FEATURES(name) \ + union { \ + u64 name; \ + u64 name##_array[VIRTIO_FEATURES_DWORDS];\ + } + +static inline bool virtio_features_chk_bit(unsigned int bit) +{ + if (__builtin_constant_p(bit)) { + /* + * Don't care returning the correct value: the build + * will fail before any bad features access + */ + BUILD_BUG_ON(bit >= VIRTIO_FEATURES_MAX); + } else { + if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_MAX)) + return false; + } + return true; +} + +static inline bool virtio_features_test_bit(const u64 *features, + unsigned int bit) +{ + return virtio_features_chk_bit(bit) && + !!(features[VIRTIO_DWORD(bit)] & VIRTIO_BIT(bit)); +} + +static inline void virtio_features_set_bit(u64 *features, + unsigned int bit) +{ + if (virtio_features_chk_bit(bit)) + features[VIRTIO_DWORD(bit)] |= VIRTIO_BIT(bit); +} + +static inline void virtio_features_clear_bit(u64 *features, + unsigned int bit) +{ + if (virtio_features_chk_bit(bit)) + features[VIRTIO_DWORD(bit)] &= ~VIRTIO_BIT(bit); +} + +static inline void virtio_features_zero(u64 *features) +{ + memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_DWORDS); +} + +static inline void virtio_features_from_u64(u64 *features, u64 from) +{ + virtio_features_zero(features); + features[0] = from; +} + +static inline bool virtio_features_equal(const u64 *f1, const u64 *f2) +{ + int i; + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i) + if (f1[i] != f2[i]) + return false; + return true; +} + +static inline void virtio_features_copy(u64 *to, const u64 *from) +{ + memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_DWORDS); +} + +static inline void virtio_features_andnot(u64 *to, const u64 *f1, const u64 *f2) +{ + int i; + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++) + to[i] = f1[i] & ~f2[i]; +} + +#endif -- cgit v1.2.3 From 69b9461512246599ed80cf13358e7e6aff7285f9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:52 +0200 Subject: virtio_pci_modern: allow configuring extended features The virtio specifications allows for up to 128 bits for the device features. Soon we are going to use some of the 'extended' bits features (above 64) for the virtio_net driver. Extend the virtio pci modern driver to support configuring the full virtio features range, replacing the unrolled loops reading and writing the features space with explicit one bounded to the actual features space size in word and implementing the get_extended_features callback. Note that in vp_finalize_features() we only need to cache the lower 64 features bits, to process the transport features. Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- include/linux/virtio_pci_modern.h | 43 +++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index c0b1b1ca1163..48bc12d1045b 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -3,6 +3,7 @@ #define _LINUX_VIRTIO_PCI_MODERN_H #include +#include #include /** @@ -95,10 +96,44 @@ static inline void vp_iowrite64_twopart(u64 val, vp_iowrite32(val >> 32, hi); } -u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev); -u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev); -void vp_modern_set_features(struct virtio_pci_modern_device *mdev, - u64 features); +void +vp_modern_get_driver_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features); +void vp_modern_get_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features); +void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev, + const u64 *features); + +static inline u64 +vp_modern_get_features(struct virtio_pci_modern_device *mdev) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + + vp_modern_get_extended_features(mdev, features_array); + return features_array[0]; +} + +static inline u64 +vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + int i; + + vp_modern_get_driver_extended_features(mdev, features_array); + for (i = 1; i < VIRTIO_FEATURES_DWORDS; ++i) + WARN_ON_ONCE(features_array[i]); + return features_array[0]; +} + +static inline void +vp_modern_set_features(struct virtio_pci_modern_device *mdev, u64 features) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + + virtio_features_from_u64(features_array, features); + vp_modern_set_extended_features(mdev, features_array); +} + u32 vp_modern_generation(struct virtio_pci_modern_device *mdev); u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev); void vp_modern_set_status(struct virtio_pci_modern_device *mdev, -- cgit v1.2.3 From 333c515d189657c934470c9b0b8a8fedb016ce6f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:54 +0200 Subject: vhost-net: allow configuring extended features Use the extended feature type for 'acked_features' and implement two new ioctls operation allowing the user-space to set/query an unbounded amount of features. The actual number of processed features is limited by VIRTIO_FEATURES_MAX and attempts to set features above such limit fail with EOPNOTSUPP. Note that: the legacy ioctls implicitly truncate the negotiated features to the lower 64 bits range and the 'acked_backend_features' field don't need conversion, as the only negotiated feature there is in the low 64 bit range. Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- include/uapi/linux/vhost.h | 7 +++++++ include/uapi/linux/vhost_types.h | 5 +++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index d4b3e2ae1314..d6ad01fbb8d2 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -235,4 +235,11 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* Extended features manipulation */ +#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) +#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) + #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index d7656908f730..1c39cc5f5a31 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -110,6 +110,11 @@ struct vhost_msg_v2 { }; }; +struct vhost_features_array { + __u64 count; /* number of entries present in features array */ + __u64 features[] __counted_by(count); +}; + struct vhost_memory_region { __u64 guest_phys_addr; __u64 memory_size; /* bytes */ -- cgit v1.2.3 From a2fb4bc4e2a6a031683910d85b278c1d25ae5420 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:54:59 +0200 Subject: net: implement virtio helpers to handle UDP GSO tunneling. The virtio specification are introducing support for GSO over UDP tunnel. This patch brings in the needed defines and the additional virtio hdr parsing/building helpers. The UDP tunnel support uses additional fields in the virtio hdr, and such fields location can change depending on other negotiated features - specifically VIRTIO_NET_F_HASH_REPORT. Try to be as conservative as possible with the new field validation. Existing implementation for plain GSO offloads allow for invalid/ self-contradictory values of such fields. With GSO over UDP tunnel we can be more strict, with no need to deal with legacy implementation. Since the checksum-related field validation is asymmetric in the driver and in the device, introduce a separate helper to implement the new checks (to be used only on the driver side). Note that while the feature space exceeds the 64-bit boundaries, the guest offload space is fixed by the specification of the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET command to a 64-bit size. Prior to the UDP tunnel GSO support, each guest offload bit corresponded to the feature bit with the same value and vice versa. Due to the limited 'guest offload' space, relevant features in the high 64 bits are 'mapped' to free bits in the lower range. That is simpler than defining a new command (and associated features) to exchange an extended guest offloads set. As a consequence, the uAPIs also specify the mapped guest offload value corresponding to the UDP tunnel GSO features. Acked-by: Jason Wang Signed-off-by: Paolo Abeni -- v4 -> v5: - avoid lines above 80 chars v3 -> v4: - fixed offset for UDP GSO tunnel, update accordingly the helpers - tried to clarified vlan_hlen semantic - virtio_net_chk_data_valid() -> virtio_net_handle_csum_offload() v2 -> v3: - add definitions for possible vnet hdr layouts with tunnel support v1 -> v2: - 'relay' -> 'rely' typo - less unclear comment WRT enforced inner GSO checks - inner header fields are allowed only with 'modern' virtio, thus are always le - clarified in the commit message the need for 'mapped features' defines - assume little_endian is true when UDP GSO is enabled. - fix inner proto type value --- include/linux/virtio_net.h | 197 ++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/virtio_net.h | 33 +++++++ 2 files changed, 222 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 02a9f4dc594d..20e0584db1dd 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -47,9 +47,9 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, return 0; } -static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, - const struct virtio_net_hdr *hdr, - bool little_endian) +static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian, u8 hdr_gso_type) { unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; @@ -57,8 +57,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int p_off = 0; unsigned int ip_proto; - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; @@ -84,7 +84,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return -EINVAL; } - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) @@ -122,7 +122,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!protocol) virtio_net_hdr_set_proto(skb, hdr); - else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) + else if (!virtio_net_hdr_match_proto(protocol, + hdr_gso_type)) return -EINVAL; else skb->protocol = protocol; @@ -153,7 +154,7 @@ retry: } } - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -199,6 +200,13 @@ retry: return 0; } +static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian) +{ + return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -242,4 +250,177 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, return 0; } +static inline unsigned int virtio_l3min(bool is_ipv6) +{ + return is_ipv6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr); +} + +static inline int +virtio_net_hdr_tnl_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr_v1_hash_tunnel *vhdr, + bool tnl_hdr_negotiated, + bool tnl_csum_negotiated, + bool little_endian) +{ + const struct virtio_net_hdr *hdr = (const struct virtio_net_hdr *)vhdr; + unsigned int inner_nh, outer_th, inner_th; + unsigned int inner_l3min, outer_l3min; + u8 gso_inner_type, gso_tunnel_type; + bool outer_isv6, inner_isv6; + int ret; + + gso_tunnel_type = hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL; + if (!gso_tunnel_type) + return virtio_net_hdr_to_skb(skb, hdr, little_endian); + + /* Tunnel not supported/negotiated, but the hdr asks for it. */ + if (!tnl_hdr_negotiated) + return -EINVAL; + + /* Either ipv4 or ipv6. */ + if (gso_tunnel_type == VIRTIO_NET_HDR_GSO_UDP_TUNNEL) + return -EINVAL; + + /* The UDP tunnel must carry a GSO packet, but no UFO. */ + gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN | + VIRTIO_NET_HDR_GSO_UDP_TUNNEL); + if (!gso_inner_type || gso_inner_type == VIRTIO_NET_HDR_GSO_UDP) + return -EINVAL; + + /* Rely on csum being present. */ + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + + /* Validate offsets. */ + outer_isv6 = gso_tunnel_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + inner_isv6 = gso_inner_type == VIRTIO_NET_HDR_GSO_TCPV6; + inner_l3min = virtio_l3min(inner_isv6); + outer_l3min = ETH_HLEN + virtio_l3min(outer_isv6); + + inner_th = __virtio16_to_cpu(little_endian, hdr->csum_start); + inner_nh = le16_to_cpu(vhdr->inner_nh_offset); + outer_th = le16_to_cpu(vhdr->outer_th_offset); + if (outer_th < outer_l3min || + inner_nh < outer_th + sizeof(struct udphdr) || + inner_th < inner_nh + inner_l3min) + return -EINVAL; + + /* Let the basic parsing deal with plain GSO features. */ + ret = __virtio_net_hdr_to_skb(skb, hdr, true, + hdr->gso_type & ~gso_tunnel_type); + if (ret) + return ret; + + /* In case of USO, the inner protocol is still unknown and + * `inner_isv6` is just a guess, additional parsing is needed. + * The previous validation ensures that accessing an ipv4 inner + * network header is safe. + */ + if (gso_inner_type == VIRTIO_NET_HDR_GSO_UDP_L4) { + struct iphdr *iphdr = (struct iphdr *)(skb->data + inner_nh); + + inner_isv6 = iphdr->version == 6; + inner_l3min = virtio_l3min(inner_isv6); + if (inner_th < inner_nh + inner_l3min) + return -EINVAL; + } + + skb_set_inner_protocol(skb, inner_isv6 ? htons(ETH_P_IPV6) : + htons(ETH_P_IP)); + if (hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM) { + if (!tnl_csum_negotiated) + return -EINVAL; + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } + + skb->inner_transport_header = inner_th + skb_headroom(skb); + skb->inner_network_header = inner_nh + skb_headroom(skb); + skb->inner_mac_header = inner_nh + skb_headroom(skb); + skb->transport_header = outer_th + skb_headroom(skb); + skb->encapsulation = 1; + return 0; +} + +/* Checksum-related fields validation for the driver */ +static inline int virtio_net_handle_csum_offload(struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool tnl_csum_negotiated) +{ + if (!(hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL)) { + if (!(hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID)) + return 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!(hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM)) + return 0; + + /* tunnel csum packets are invalid when the related + * feature has not been negotiated + */ + if (!tnl_csum_negotiated) + return -EINVAL; + skb->csum_level = 1; + return 0; + } + + /* DATA_VALID is mutually exclusive with NEEDS_CSUM, and GSO + * over UDP tunnel requires the latter + */ + if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) + return -EINVAL; + return 0; +} + +/* + * vlan_hlen always refers to the outermost MAC header. That also + * means it refers to the only MAC header, if the packet does not carry + * any encapsulation. + */ +static inline int +virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, + struct virtio_net_hdr_v1_hash_tunnel *vhdr, + bool tnl_hdr_negotiated, + bool little_endian, + int vlan_hlen) +{ + struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; + unsigned int inner_nh, outer_th; + int tnl_gso_type; + int ret; + + tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM); + if (!tnl_gso_type) + return virtio_net_hdr_from_skb(skb, hdr, little_endian, false, + vlan_hlen); + + /* Tunnel support not negotiated but skb ask for it. */ + if (!tnl_hdr_negotiated) + return -EINVAL; + + /* Let the basic parsing deal with plain GSO features. */ + skb_shinfo(skb)->gso_type &= ~tnl_gso_type; + ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); + skb_shinfo(skb)->gso_type |= tnl_gso_type; + if (ret) + return ret; + + if (skb->protocol == htons(ETH_P_IPV6)) + hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + else + hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + hdr->flags |= VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM; + + inner_nh = skb->inner_network_header - skb_headroom(skb); + outer_th = skb->transport_header - skb_headroom(skb); + vhdr->inner_nh_offset = cpu_to_le16(inner_nh); + vhdr->outer_th_offset = cpu_to_le16(outer_th); + return 0; +} + #endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 963540deae66..8bf27ab8bcb4 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -70,6 +70,28 @@ * with the same MAC. */ #define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO 65 /* Driver can receive + * GSO-over-UDP-tunnel packets + */ +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM 66 /* Driver handles + * GSO-over-UDP-tunnel + * packets with partial csum + * for the outer header + */ +#define VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO 67 /* Device can receive + * GSO-over-UDP-tunnel packets + */ +#define VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM 68 /* Device handles + * GSO-over-UDP-tunnel + * packets with partial csum + * for the outer header + */ + +/* Offloads bits corresponding to VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO{,_CSUM} + * features + */ +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED 46 +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED 47 #ifndef VIRTIO_NET_NO_LEGACY #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ @@ -131,12 +153,17 @@ struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ +#define VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM 8 /* UDP tunnel csum offload */ __u8 flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ #define VIRTIO_NET_HDR_GSO_UDP_L4 5 /* GSO frame, IPv4& IPv6 UDP (USO) */ +#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 0x20 /* UDPv4 tunnel present */ +#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6 0x40 /* UDPv6 tunnel present */ +#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 | \ + VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6) #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ __u8 gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ @@ -181,6 +208,12 @@ struct virtio_net_hdr_v1_hash { __le16 padding; }; +struct virtio_net_hdr_v1_hash_tunnel { + struct virtio_net_hdr_v1_hash hash_hdr; + __le16 outer_th_offset; + __le16 inner_nh_offset; +}; + #ifndef VIRTIO_NET_NO_LEGACY /* This header comes first in the scatter-gather list. * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must -- cgit v1.2.3 From 288f30435132d2f9e7a29ec9b9745a4f9dc7fd37 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Jul 2025 17:55:30 +0200 Subject: tun: enable gso over UDP tunnel support. Add new tun features to represent the newly introduced virtio GSO over UDP tunnel offload. Allows detection and selection of such features via the existing TUNSETOFFLOAD ioctl and compute the expected virtio header size and tunnel header offset using the current netdev features, so that we can plug almost seamless the newly introduced virtio helpers to serialize the extended virtio header. Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- v6 -> v7: - rebased v4 -> v5: - encapsulate the guest feature guessing in a tun helper - dropped irrelevant check on xdp buff headroom - do not remove unrelated black line - avoid line len > 80 char v3 -> v4: - virtio tnl-related fields are at fixed offset, cleanup the code accordingly. - use netdev features instead of flags bit to check for the configured offload - drop packet in case of enabled features/configured hdr size mismatch v2 -> v3: - cleaned-up uAPI comments - use explicit struct layout instead of raw buf. --- include/uapi/linux/if_tun.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 287cdc81c939..79d53c7a1ebd 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -93,6 +93,15 @@ #define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ #define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ +/* I can handle TSO/USO for UDP tunneled packets */ +#define TUN_F_UDP_TUNNEL_GSO 0x080 + +/* + * I can handle TSO/USO for UDP tunneled packets requiring csum offload for + * the outer header + */ +#define TUN_F_UDP_TUNNEL_GSO_CSUM 0x100 + /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ #define TUN_PKT_STRIP 0x0001 struct tun_pi { -- cgit v1.2.3 From 58074a0fce66c6c97b35ce8a28ed4e7b780f9a8f Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 11 Jun 2025 13:01:14 -0500 Subject: perf: arm_pmuv3: Add support for the Branch Record Buffer Extension (BRBE) The ARMv9.2 architecture introduces the optional Branch Record Buffer Extension (BRBE), which records information about branches as they are executed into set of branch record registers. BRBE is similar to x86's Last Branch Record (LBR) and PowerPC's Branch History Rolling Buffer (BHRB). BRBE supports filtering by exception level and can filter just the source or target address if excluded to avoid leaking privileged addresses. The h/w filter would be sufficient except when there are multiple events with disjoint filtering requirements. In this case, BRBE is configured with a union of all the events' desired branches, and then the recorded branches are filtered based on each event's filter. For example, with one event capturing kernel events and another event capturing user events, BRBE will be configured to capture both kernel and user branches. When handling event overflow, the branch records have to be filtered by software to only include kernel or user branch addresses for that event. In contrast, x86 simply configures LBR using the last installed event which seems broken. It is possible on x86 to configure branch filter such that no branches are ever recorded (e.g. -j save_type). For BRBE, events with a configuration that will result in no samples are rejected. Recording branches in KVM guests is not supported like x86. However, perf on x86 allows requesting branch recording in guests. The guest events are recorded, but the resulting branches are all from the host. For BRBE, events with branch recording and "exclude_host" set are rejected. Requiring "exclude_guest" to be set did not work. The default for the perf tool does set "exclude_guest" if no exception level options are specified. However, specifying kernel or user events defaults to including both host and guest. In this case, only host branches are recorded. BRBE can support some additional exception branch types compared to x86. On x86, all exceptions other than syscalls are recorded as IRQ. With BRBE, it is possible to better categorize these exceptions. One limitation relative to x86 is we cannot distinguish a syscall return from other exception returns. So all exception returns are recorded as ERET type. The FIQ branch type is omitted as the only FIQ user is Apple platforms which don't support BRBE. The debug branch types are omitted as there is no clear need for them. BRBE records are invalidated whenever events are reconfigured, a new task is scheduled in, or after recording is paused (and the records have been recorded for the event). The architecture allows branch records to be invalidated by the PE under implementation defined conditions. It is expected that these conditions are rare. Cc: Catalin Marinas Co-developed-by: Anshuman Khandual Signed-off-by: Anshuman Khandual Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Tested-by: James Clark Signed-off-by: Rob Herring (Arm) tested-by: Adam Young Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250611-arm-brbe-v19-v23-4-e7775563036e@kernel.org [will: Fix sparse warnings about mixed declarations and code. Fix C99 comment syntax.] Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 6dc5e0cd76ca..93c9a26492fc 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -70,6 +70,11 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; int irq; + + struct perf_branch_stack *branch_stack; + + /* Active events requesting branch records */ + unsigned int branch_users; }; enum armpmu_attr_groups { @@ -115,6 +120,7 @@ struct arm_pmu { /* PMUv3 only */ int pmuver; u64 reg_pmmir; + u64 reg_brbidr; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); #define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 @@ -126,6 +132,8 @@ struct arm_pmu { #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) +DECLARE_PER_CPU(struct arm_pmu *, cpu_armpmu); + u64 armpmu_event_update(struct perf_event *event); int armpmu_event_set_period(struct perf_event *event); -- cgit v1.2.3 From fc582cd26e888b0652bc1494f252329453fd3b23 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Jul 2025 11:00:32 -0600 Subject: io_uring/msg_ring: ensure io_kiocb freeing is deferred for RCU syzbot reports that defer/local task_work adding via msg_ring can hit a request that has been freed: CPU: 1 UID: 0 PID: 19356 Comm: iou-wrk-19354 Not tainted 6.16.0-rc4-syzkaller-00108-g17bbde2e1716 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 io_req_local_work_add io_uring/io_uring.c:1184 [inline] __io_req_task_work_add+0x589/0x950 io_uring/io_uring.c:1252 io_msg_remote_post io_uring/msg_ring.c:103 [inline] io_msg_data_remote io_uring/msg_ring.c:133 [inline] __io_msg_ring_data+0x820/0xaa0 io_uring/msg_ring.c:151 io_msg_ring_data io_uring/msg_ring.c:173 [inline] io_msg_ring+0x134/0xa00 io_uring/msg_ring.c:314 __io_issue_sqe+0x17e/0x4b0 io_uring/io_uring.c:1739 io_issue_sqe+0x165/0xfd0 io_uring/io_uring.c:1762 io_wq_submit_work+0x6e9/0xb90 io_uring/io_uring.c:1874 io_worker_handle_work+0x7cd/0x1180 io_uring/io-wq.c:642 io_wq_worker+0x42f/0xeb0 io_uring/io-wq.c:696 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 which is supposed to be safe with how requests are allocated. But msg ring requests alloc and free on their own, and hence must defer freeing to a sane time. Add an rcu_head and use kfree_rcu() in both spots where requests are freed. Only the one in io_msg_tw_complete() is strictly required as it has been visible on the other ring, but use it consistently in the other spot as well. This should not cause any other issues outside of KASAN rightfully complaining about it. Link: https://lore.kernel.org/io-uring/686cd2ea.a00a0220.338033.0007.GAE@google.com/ Reported-by: syzbot+54cbbfb4db9145d26fc2@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Fixes: 0617bb500bfa ("io_uring/msg_ring: improve handling of target CQE posting") Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -698,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; -- cgit v1.2.3 From 7ec80fb3f025825e860b433685fb801d6de34bf3 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:10 +0200 Subject: irqchip/gic-v5: Add GICv5 PPI support The GICv5 CPU interface implements support for PE-Private Peripheral Interrupts (PPI), that are handled (enabled/prioritized/delivered) entirely within the CPU interface hardware. To enable PPI interrupts, implement the baseline GICv5 host kernel driver infrastructure required to handle interrupts on a GICv5 system. Add the exception handling code path and definitions for GICv5 instructions. Add GICv5 PPI handling code as a specific IRQ domain to: - Set-up PPI priority - Manage PPI configuration and state - Manage IRQ flow handler - IRQs allocation/free - Hook-up a PPI specific IRQchip to provide the relevant methods PPI IRQ priority is chosen as the minimum allowed priority by the system design (after probing the number of priority bits implemented by the CPU interface). Co-developed-by: Sascha Bischoff Signed-off-by: Sascha Bischoff Co-developed-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Will Deacon Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-20-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v5.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/irqchip/arm-gic-v5.h (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h new file mode 100644 index 000000000000..b08ec0308a9b --- /dev/null +++ b/include/linux/irqchip/arm-gic-v5.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 ARM Limited, All Rights Reserved. + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_V5_H +#define __LINUX_IRQCHIP_ARM_GIC_V5_H + +#include + +/* + * INTID handling + */ +#define GICV5_HWIRQ_ID GENMASK(23, 0) +#define GICV5_HWIRQ_TYPE GENMASK(31, 29) +#define GICV5_HWIRQ_INTID GENMASK_ULL(31, 0) + +#define GICV5_HWIRQ_TYPE_PPI UL(0x1) + +#endif -- cgit v1.2.3 From 5cb1b6dab2def316671ea2565291a86ad58b884c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:11 +0200 Subject: irqchip/gic-v5: Add GICv5 IRS/SPI support The GICv5 Interrupt Routing Service (IRS) component implements interrupt management and routing in the GICv5 architecture. A GICv5 system comprises one or more IRSes, that together handle the interrupt routing and state for the system. An IRS supports Shared Peripheral Interrupts (SPIs), that are interrupt sources directly connected to the IRS; they do not rely on memory for storage. The number of supported SPIs is fixed for a given implementation and can be probed through IRS IDR registers. SPI interrupt state and routing are managed through GICv5 instructions. Each core (PE in GICv5 terms) in a GICv5 system is identified with an Interrupt AFFinity ID (IAFFID). An IRS manages a set of cores that are connected to it. Firmware provides a topology description that the driver uses to detect to which IRS a CPU (ie an IAFFID) is associated with. Use probeable information and firmware description to initialize the IRSes and implement GICv5 IRS SPIs support through an SPI-specific IRQ domain. The GICv5 IRS driver: - Probes IRSes in the system to detect SPI ranges - Associates an IRS with a set of cores connected to it - Adds an IRQchip structure for SPI handling SPIs priority is set to a value corresponding to the lowest permissible priority in the system (taking into account the implemented priority bits of the IRS and CPU interface). Since all IRQs are set to the same priority value, the value itself does not matter as long as it is a valid one. Co-developed-by: Sascha Bischoff Signed-off-by: Sascha Bischoff Co-developed-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Will Deacon Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-21-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v5.h | 140 +++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index b08ec0308a9b..1064a69ab33f 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -5,6 +5,8 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_V5_H #define __LINUX_IRQCHIP_ARM_GIC_V5_H +#include + #include /* @@ -15,5 +17,143 @@ #define GICV5_HWIRQ_INTID GENMASK_ULL(31, 0) #define GICV5_HWIRQ_TYPE_PPI UL(0x1) +#define GICV5_HWIRQ_TYPE_SPI UL(0x3) + +/* + * Tables attributes + */ +#define GICV5_NO_READ_ALLOC 0b0 +#define GICV5_READ_ALLOC 0b1 +#define GICV5_NO_WRITE_ALLOC 0b0 +#define GICV5_WRITE_ALLOC 0b1 + +#define GICV5_NON_CACHE 0b00 +#define GICV5_WB_CACHE 0b01 +#define GICV5_WT_CACHE 0b10 + +#define GICV5_NON_SHARE 0b00 +#define GICV5_OUTER_SHARE 0b10 +#define GICV5_INNER_SHARE 0b11 + +/* + * IRS registers + */ +#define GICV5_IRS_IDR1 0x0004 +#define GICV5_IRS_IDR2 0x0008 +#define GICV5_IRS_IDR5 0x0014 +#define GICV5_IRS_IDR6 0x0018 +#define GICV5_IRS_IDR7 0x001c +#define GICV5_IRS_CR0 0x0080 +#define GICV5_IRS_CR1 0x0084 +#define GICV5_IRS_SPI_SELR 0x0108 +#define GICV5_IRS_SPI_CFGR 0x0114 +#define GICV5_IRS_SPI_STATUSR 0x0118 +#define GICV5_IRS_PE_SELR 0x0140 +#define GICV5_IRS_PE_STATUSR 0x0144 +#define GICV5_IRS_PE_CR0 0x0148 + +#define GICV5_IRS_IDR1_PRIORITY_BITS GENMASK(22, 20) +#define GICV5_IRS_IDR1_IAFFID_BITS GENMASK(19, 16) + +#define GICV5_IRS_IDR1_PRIORITY_BITS_1BITS 0b000 +#define GICV5_IRS_IDR1_PRIORITY_BITS_2BITS 0b001 +#define GICV5_IRS_IDR1_PRIORITY_BITS_3BITS 0b010 +#define GICV5_IRS_IDR1_PRIORITY_BITS_4BITS 0b011 +#define GICV5_IRS_IDR1_PRIORITY_BITS_5BITS 0b100 + +#define GICV5_IRS_IDR2_ISTMD_SZ GENMASK(19, 15) +#define GICV5_IRS_IDR2_ISTMD BIT(14) +#define GICV5_IRS_IDR2_IST_L2SZ GENMASK(13, 11) +#define GICV5_IRS_IDR2_IST_LEVELS BIT(10) +#define GICV5_IRS_IDR2_MIN_LPI_ID_BITS GENMASK(9, 6) +#define GICV5_IRS_IDR2_LPI BIT(5) +#define GICV5_IRS_IDR2_ID_BITS GENMASK(4, 0) + +#define GICV5_IRS_IDR5_SPI_RANGE GENMASK(24, 0) +#define GICV5_IRS_IDR6_SPI_IRS_RANGE GENMASK(24, 0) +#define GICV5_IRS_IDR7_SPI_BASE GENMASK(23, 0) +#define GICV5_IRS_CR0_IDLE BIT(1) +#define GICV5_IRS_CR0_IRSEN BIT(0) + +#define GICV5_IRS_CR1_VPED_WA BIT(15) +#define GICV5_IRS_CR1_VPED_RA BIT(14) +#define GICV5_IRS_CR1_VMD_WA BIT(13) +#define GICV5_IRS_CR1_VMD_RA BIT(12) +#define GICV5_IRS_CR1_VPET_WA BIT(11) +#define GICV5_IRS_CR1_VPET_RA BIT(10) +#define GICV5_IRS_CR1_VMT_WA BIT(9) +#define GICV5_IRS_CR1_VMT_RA BIT(8) +#define GICV5_IRS_CR1_IST_WA BIT(7) +#define GICV5_IRS_CR1_IST_RA BIT(6) +#define GICV5_IRS_CR1_IC GENMASK(5, 4) +#define GICV5_IRS_CR1_OC GENMASK(3, 2) +#define GICV5_IRS_CR1_SH GENMASK(1, 0) + +#define GICV5_IRS_SPI_STATUSR_V BIT(1) +#define GICV5_IRS_SPI_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_SPI_SELR_ID GENMASK(23, 0) + +#define GICV5_IRS_SPI_CFGR_TM BIT(0) + +#define GICV5_IRS_PE_SELR_IAFFID GENMASK(15, 0) + +#define GICV5_IRS_PE_STATUSR_V BIT(1) +#define GICV5_IRS_PE_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_PE_CR0_DPS BIT(0) + +/* + * Global Data structures and functions + */ +struct gicv5_chip_data { + struct fwnode_handle *fwnode; + struct irq_domain *ppi_domain; + struct irq_domain *spi_domain; + u32 global_spi_count; + u8 cpuif_pri_bits; + u8 irs_pri_bits; +}; + +extern struct gicv5_chip_data gicv5_global_data __read_mostly; + +struct gicv5_irs_chip_data { + struct list_head entry; + struct fwnode_handle *fwnode; + void __iomem *irs_base; + u32 flags; + u32 spi_min; + u32 spi_range; + raw_spinlock_t spi_config_lock; +}; + +static inline int gicv5_wait_for_op_s_atomic(void __iomem *addr, u32 offset, + const char *reg_s, u32 mask, + u32 *val) +{ + void __iomem *reg = addr + offset; + u32 tmp; + int ret; + + ret = readl_poll_timeout_atomic(reg, tmp, tmp & mask, 1, 10 * USEC_PER_MSEC); + if (unlikely(ret == -ETIMEDOUT)) { + pr_err_ratelimited("%s timeout...\n", reg_s); + return ret; + } + + if (val) + *val = tmp; + + return 0; +} + +#define gicv5_wait_for_op_atomic(base, reg, mask, val) \ + gicv5_wait_for_op_s_atomic(base, reg, #reg, mask, val) +int gicv5_irs_of_probe(struct device_node *parent); +void gicv5_irs_remove(void); +int gicv5_irs_register_cpu(int cpuid); +int gicv5_irs_cpu_to_iaffid(int cpu_id, u16 *iaffid); +struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id); +int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type); #endif -- cgit v1.2.3 From 0f0101325876859eb463792d4df3fd22b6539d29 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:12 +0200 Subject: irqchip/gic-v5: Add GICv5 LPI/IPI support An IRS supports Logical Peripheral Interrupts (LPIs) and implement Linux IPIs on top of it. LPIs are used for interrupt signals that are translated by a GICv5 ITS (Interrupt Translation Service) but also for software generated IRQs - namely interrupts that are not driven by a HW signal, ie IPIs. LPIs rely on memory storage for interrupt routing and state. LPIs state and routing information is kept in the Interrupt State Table (IST). IRSes provide support for 1- or 2-level IST tables configured to support a maximum number of interrupts that depend on the OS configuration and the HW capabilities. On systems that provide 2-level IST support, always allow the maximum number of LPIs; On systems with only 1-level support, limit the number of LPIs to 2^12 to prevent wasting memory (presumably a system that supports a 1-level only IST is not expecting a large number of interrupts). On a 2-level IST system, L2 entries are allocated on demand. The IST table memory is allocated using the kmalloc() interface; the allocation required may be smaller than a page and must be made up of contiguous physical pages if larger than a page. On systems where the IRS is not cache-coherent with the CPUs, cache mainteinance operations are executed to clean and invalidate the allocated memory to the point of coherency making it visible to the IRS components. On GICv5 systems, IPIs are implemented using LPIs. Add an LPI IRQ domain and implement an IPI-specific IRQ domain created as a child/subdomain of the LPI domain to allocate the required number of LPIs needed to implement the IPIs. IPIs are backed by LPIs, add LPIs allocation/de-allocation functions. The LPI INTID namespace is managed using an IDA to alloc/free LPI INTIDs. Associate an IPI irqchip with IPI IRQ descriptors to provide core code with the irqchip.ipi_send_single() method required to raise an IPI. Co-developed-by: Sascha Bischoff Signed-off-by: Sascha Bischoff Co-developed-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Will Deacon Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-22-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v5.h | 63 +++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 1064a69ab33f..680eed794a35 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -7,8 +7,12 @@ #include +#include +#include #include +#define GICV5_IPIS_PER_CPU MAX_IPI + /* * INTID handling */ @@ -17,6 +21,7 @@ #define GICV5_HWIRQ_INTID GENMASK_ULL(31, 0) #define GICV5_HWIRQ_TYPE_PPI UL(0x1) +#define GICV5_HWIRQ_TYPE_LPI UL(0x2) #define GICV5_HWIRQ_TYPE_SPI UL(0x3) /* @@ -36,7 +41,7 @@ #define GICV5_INNER_SHARE 0b11 /* - * IRS registers + * IRS registers and tables structures */ #define GICV5_IRS_IDR1 0x0004 #define GICV5_IRS_IDR2 0x0008 @@ -51,6 +56,10 @@ #define GICV5_IRS_PE_SELR 0x0140 #define GICV5_IRS_PE_STATUSR 0x0144 #define GICV5_IRS_PE_CR0 0x0148 +#define GICV5_IRS_IST_BASER 0x0180 +#define GICV5_IRS_IST_CFGR 0x0190 +#define GICV5_IRS_IST_STATUSR 0x0194 +#define GICV5_IRS_MAP_L2_ISTR 0x01c0 #define GICV5_IRS_IDR1_PRIORITY_BITS GENMASK(22, 20) #define GICV5_IRS_IDR1_IAFFID_BITS GENMASK(19, 16) @@ -72,6 +81,11 @@ #define GICV5_IRS_IDR5_SPI_RANGE GENMASK(24, 0) #define GICV5_IRS_IDR6_SPI_IRS_RANGE GENMASK(24, 0) #define GICV5_IRS_IDR7_SPI_BASE GENMASK(23, 0) + +#define GICV5_IRS_IST_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(11), (r)) +#define GICV5_IRS_IST_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(12), (r)) +#define GICV5_IRS_IST_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(13), (r)) + #define GICV5_IRS_CR0_IDLE BIT(1) #define GICV5_IRS_CR0_IRSEN BIT(0) @@ -103,6 +117,33 @@ #define GICV5_IRS_PE_CR0_DPS BIT(0) +#define GICV5_IRS_IST_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_IST_CFGR_STRUCTURE BIT(16) +#define GICV5_IRS_IST_CFGR_ISTSZ GENMASK(8, 7) +#define GICV5_IRS_IST_CFGR_L2SZ GENMASK(6, 5) +#define GICV5_IRS_IST_CFGR_LPI_ID_BITS GENMASK(4, 0) + +#define GICV5_IRS_IST_CFGR_STRUCTURE_LINEAR 0b0 +#define GICV5_IRS_IST_CFGR_STRUCTURE_TWO_LEVEL 0b1 + +#define GICV5_IRS_IST_CFGR_ISTSZ_4 0b00 +#define GICV5_IRS_IST_CFGR_ISTSZ_8 0b01 +#define GICV5_IRS_IST_CFGR_ISTSZ_16 0b10 + +#define GICV5_IRS_IST_CFGR_L2SZ_4K 0b00 +#define GICV5_IRS_IST_CFGR_L2SZ_16K 0b01 +#define GICV5_IRS_IST_CFGR_L2SZ_64K 0b10 + +#define GICV5_IRS_IST_BASER_ADDR_MASK GENMASK_ULL(55, 6) +#define GICV5_IRS_IST_BASER_VALID BIT_ULL(0) + +#define GICV5_IRS_MAP_L2_ISTR_ID GENMASK(23, 0) + +#define GICV5_ISTL1E_VALID BIT_ULL(0) + +#define GICV5_ISTL1E_L2_ADDR_MASK GENMASK_ULL(55, 12) + /* * Global Data structures and functions */ @@ -110,9 +151,18 @@ struct gicv5_chip_data { struct fwnode_handle *fwnode; struct irq_domain *ppi_domain; struct irq_domain *spi_domain; + struct irq_domain *lpi_domain; + struct irq_domain *ipi_domain; u32 global_spi_count; u8 cpuif_pri_bits; + u8 cpuif_id_bits; u8 irs_pri_bits; + struct { + __le64 *l1ist_addr; + u32 l2_size; + u8 l2_bits; + bool l2; + } ist; }; extern struct gicv5_chip_data gicv5_global_data __read_mostly; @@ -150,10 +200,21 @@ static inline int gicv5_wait_for_op_s_atomic(void __iomem *addr, u32 offset, #define gicv5_wait_for_op_atomic(base, reg, mask, val) \ gicv5_wait_for_op_s_atomic(base, reg, #reg, mask, val) +void __init gicv5_init_lpi_domain(void); +void __init gicv5_free_lpi_domain(void); + int gicv5_irs_of_probe(struct device_node *parent); void gicv5_irs_remove(void); +int gicv5_irs_enable(void); int gicv5_irs_register_cpu(int cpuid); int gicv5_irs_cpu_to_iaffid(int cpu_id, u16 *iaffid); struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id); int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type); +int gicv5_irs_iste_alloc(u32 lpi); + +void gicv5_init_lpis(u32 max); +void gicv5_deinit_lpis(void); + +int gicv5_alloc_lpi(void); +void gicv5_free_lpi(u32 lpi); #endif -- cgit v1.2.3 From 31fd3becb920e0b31b99cf202ace637f75dd7e78 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:14 +0200 Subject: of/irq: Add of_msi_xlate() helper function Add an of_msi_xlate() helper that maps a device ID and returns the device node of the MSI controller the device ID is mapped to. Required by core functions that need an MSI controller device node pointer at the same time as a mapped device ID, of_msi_map_id() is not sufficient for that purpose. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Rob Herring Cc: Marc Zyngier Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-24-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/of_irq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 6337ad4e5fe8..a480063c9cb1 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -54,6 +54,7 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, u32 bus_token); extern void of_msi_configure(struct device *dev, const struct device_node *np); +extern u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline void of_irq_init(const struct of_device_id *matches) @@ -100,6 +101,10 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev static inline void of_msi_configure(struct device *dev, struct device_node *np) { } +static inline u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) +{ + return id_in; +} static inline u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in) { -- cgit v1.2.3 From cd0ec59affb1f31347170bbafadb9c5cc716bca9 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:15 +0200 Subject: PCI/MSI: Add pci_msi_map_rid_ctlr_node() helper function IRQchip drivers need a PCI/MSI function to map a RID to a MSI controller deviceID namespace and at the same time retrieve the struct device_node pointer of the MSI controller the RID is mapped to. Add pci_msi_map_rid_ctlr_node() to achieve this purpose. Cc Bjorn Helgaas Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-25-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/msi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 6863540f4b71..a418e2695b05 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -705,6 +705,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); +u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); #else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) -- cgit v1.2.3 From 8b65db1e93a227ad9cc1b67cb221b06869f0b35f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:17 +0200 Subject: irqchip/msi-lib: Add IRQ_DOMAIN_FLAG_FWNODE_PARENT handling In some irqchip implementations the fwnode representing the IRQdomain and the MSI controller fwnode do not match; in particular the IRQdomain fwnode is the MSI controller fwnode parent. To support selecting such IRQ domains, add a flag in core IRQ domain code that explicitly tells the MSI lib to use the parent fwnode while carrying out IRQ domain selection. Update the msi-lib select callback with the resulting logic. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-27-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 7387d183029b..25c7cbeed393 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -212,6 +212,9 @@ enum { /* Address and data pair is mutable when irq_set_affinity() */ IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11), + /* IRQ domain requires parent fwnode matching */ + IRQ_DOMAIN_FLAG_FWNODE_PARENT = (1 << 12), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the -- cgit v1.2.3 From 57d72196dfc8502b7e376ecdffb11c4f8766f26d Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:18 +0200 Subject: irqchip/gic-v5: Add GICv5 ITS support The GICv5 architecture implements Interrupt Translation Service (ITS) components in order to translate events coming from peripherals into interrupt events delivered to the connected IRSes. Events (ie MSI memory writes to ITS translate frame), are translated by the ITS using tables kept in memory. ITS translation tables for peripherals is kept in memory storage (device table [DT] and Interrupt Translation Table [ITT]) that is allocated by the driver on boot. Both tables can be 1- or 2-level; the structure is chosen by the driver after probing the ITS HW parameters and checking the allowed table splits and supported {device/event}_IDbits. DT table entries are allocated on demand (ie when a device is probed); the DT table is sized using the number of supported deviceID bits in that that's a system design decision (ie the number of deviceID bits implemented should reflect the number of devices expected in a system) therefore it makes sense to allocate a DT table that can cater for the maximum number of devices. DT and ITT tables are allocated using the kmalloc interface; the allocation size may be smaller than a page or larger, and must provide contiguous memory pages. LPIs INTIDs backing the device events are allocated one-by-one and only upon Linux IRQ allocation; this to avoid preallocating a large number of LPIs to cover the HW device MSI vector size whereas few MSI entries are actually enabled by a device. ITS cacheability/shareability attributes are programmed according to the provided firmware ITS description. The GICv5 partially reuses the GICv3 ITS MSI parent infrastructure and adds functions required to retrieve the ITS translate frame addresses out of msi-map and msi-parent properties to implement the GICv5 ITS MSI parent callbacks. Co-developed-by: Sascha Bischoff Signed-off-by: Sascha Bischoff Co-developed-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-28-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v5.h | 157 +++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 680eed794a35..07b952549bfa 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -50,6 +50,8 @@ #define GICV5_IRS_IDR7 0x001c #define GICV5_IRS_CR0 0x0080 #define GICV5_IRS_CR1 0x0084 +#define GICV5_IRS_SYNCR 0x00c0 +#define GICV5_IRS_SYNC_STATUSR 0x00c4 #define GICV5_IRS_SPI_SELR 0x0108 #define GICV5_IRS_SPI_CFGR 0x0114 #define GICV5_IRS_SPI_STATUSR 0x0118 @@ -103,6 +105,10 @@ #define GICV5_IRS_CR1_OC GENMASK(3, 2) #define GICV5_IRS_CR1_SH GENMASK(1, 0) +#define GICV5_IRS_SYNCR_SYNC BIT(31) + +#define GICV5_IRS_SYNC_STATUSR_IDLE BIT(0) + #define GICV5_IRS_SPI_STATUSR_V BIT(1) #define GICV5_IRS_SPI_STATUSR_IDLE BIT(0) @@ -144,6 +150,104 @@ #define GICV5_ISTL1E_L2_ADDR_MASK GENMASK_ULL(55, 12) +/* + * ITS registers and tables structures + */ +#define GICV5_ITS_IDR1 0x0004 +#define GICV5_ITS_IDR2 0x0008 +#define GICV5_ITS_CR0 0x0080 +#define GICV5_ITS_CR1 0x0084 +#define GICV5_ITS_DT_BASER 0x00c0 +#define GICV5_ITS_DT_CFGR 0x00d0 +#define GICV5_ITS_DIDR 0x0100 +#define GICV5_ITS_EIDR 0x0108 +#define GICV5_ITS_INV_EVENTR 0x010c +#define GICV5_ITS_INV_DEVICER 0x0110 +#define GICV5_ITS_STATUSR 0x0120 +#define GICV5_ITS_SYNCR 0x0140 +#define GICV5_ITS_SYNC_STATUSR 0x0148 + +#define GICV5_ITS_IDR1_L2SZ GENMASK(10, 8) +#define GICV5_ITS_IDR1_ITT_LEVELS BIT(7) +#define GICV5_ITS_IDR1_DT_LEVELS BIT(6) +#define GICV5_ITS_IDR1_DEVICEID_BITS GENMASK(5, 0) + +#define GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(8), (r)) +#define GICV5_ITS_IDR1_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(9), (r)) +#define GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(10), (r)) + +#define GICV5_ITS_IDR2_XDMN_EVENTs GENMASK(6, 5) +#define GICV5_ITS_IDR2_EVENTID_BITS GENMASK(4, 0) + +#define GICV5_ITS_CR0_IDLE BIT(1) +#define GICV5_ITS_CR0_ITSEN BIT(0) + +#define GICV5_ITS_CR1_ITT_RA BIT(7) +#define GICV5_ITS_CR1_DT_RA BIT(6) +#define GICV5_ITS_CR1_IC GENMASK(5, 4) +#define GICV5_ITS_CR1_OC GENMASK(3, 2) +#define GICV5_ITS_CR1_SH GENMASK(1, 0) + +#define GICV5_ITS_DT_CFGR_STRUCTURE BIT(16) +#define GICV5_ITS_DT_CFGR_L2SZ GENMASK(7, 6) +#define GICV5_ITS_DT_CFGR_DEVICEID_BITS GENMASK(5, 0) + +#define GICV5_ITS_DT_BASER_ADDR_MASK GENMASK_ULL(55, 3) + +#define GICV5_ITS_INV_DEVICER_I BIT(31) +#define GICV5_ITS_INV_DEVICER_EVENTID_BITS GENMASK(5, 1) +#define GICV5_ITS_INV_DEVICER_L1 BIT(0) + +#define GICV5_ITS_DIDR_DEVICEID GENMASK_ULL(31, 0) + +#define GICV5_ITS_EIDR_EVENTID GENMASK(15, 0) + +#define GICV5_ITS_INV_EVENTR_I BIT(31) +#define GICV5_ITS_INV_EVENTR_ITT_L2SZ GENMASK(2, 1) +#define GICV5_ITS_INV_EVENTR_L1 BIT(0) + +#define GICV5_ITS_STATUSR_IDLE BIT(0) + +#define GICV5_ITS_SYNCR_SYNC BIT_ULL(63) +#define GICV5_ITS_SYNCR_SYNCALL BIT_ULL(32) +#define GICV5_ITS_SYNCR_DEVICEID GENMASK_ULL(31, 0) + +#define GICV5_ITS_SYNC_STATUSR_IDLE BIT(0) + +#define GICV5_DTL1E_VALID BIT_ULL(0) +/* Note that there is no shift for the address by design */ +#define GICV5_DTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3) +#define GICV5_DTL1E_SPAN GENMASK_ULL(63, 60) + +#define GICV5_DTL2E_VALID BIT_ULL(0) +#define GICV5_DTL2E_ITT_L2SZ GENMASK_ULL(2, 1) +/* Note that there is no shift for the address by design */ +#define GICV5_DTL2E_ITT_ADDR_MASK GENMASK_ULL(55, 3) +#define GICV5_DTL2E_ITT_DSWE BIT_ULL(57) +#define GICV5_DTL2E_ITT_STRUCTURE BIT_ULL(58) +#define GICV5_DTL2E_EVENT_ID_BITS GENMASK_ULL(63, 59) + +#define GICV5_ITTL1E_VALID BIT_ULL(0) +/* Note that there is no shift for the address by design */ +#define GICV5_ITTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3) +#define GICV5_ITTL1E_SPAN GENMASK_ULL(63, 60) + +#define GICV5_ITTL2E_LPI_ID GENMASK_ULL(23, 0) +#define GICV5_ITTL2E_DAC GENMASK_ULL(29, 28) +#define GICV5_ITTL2E_VIRTUAL BIT_ULL(30) +#define GICV5_ITTL2E_VALID BIT_ULL(31) +#define GICV5_ITTL2E_VM_ID GENMASK_ULL(47, 32) + +#define GICV5_ITS_DT_ITT_CFGR_L2SZ_4k 0b00 +#define GICV5_ITS_DT_ITT_CFGR_L2SZ_16k 0b01 +#define GICV5_ITS_DT_ITT_CFGR_L2SZ_64k 0b10 + +#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR 0 +#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL 1 + +#define GICV5_ITS_HWIRQ_DEVICE_ID GENMASK_ULL(31, 0) +#define GICV5_ITS_HWIRQ_EVENT_ID GENMASK_ULL(63, 32) + /* * Global Data structures and functions */ @@ -197,24 +301,77 @@ static inline int gicv5_wait_for_op_s_atomic(void __iomem *addr, u32 offset, return 0; } +static inline int gicv5_wait_for_op_s(void __iomem *addr, u32 offset, + const char *reg_s, u32 mask) +{ + void __iomem *reg = addr + offset; + u32 val; + int ret; + + ret = readl_poll_timeout(reg, val, val & mask, 1, 10 * USEC_PER_MSEC); + if (unlikely(ret == -ETIMEDOUT)) { + pr_err_ratelimited("%s timeout...\n", reg_s); + return ret; + } + + return 0; +} + #define gicv5_wait_for_op_atomic(base, reg, mask, val) \ gicv5_wait_for_op_s_atomic(base, reg, #reg, mask, val) +#define gicv5_wait_for_op(base, reg, mask) \ + gicv5_wait_for_op_s(base, reg, #reg, mask) + void __init gicv5_init_lpi_domain(void); void __init gicv5_free_lpi_domain(void); int gicv5_irs_of_probe(struct device_node *parent); void gicv5_irs_remove(void); int gicv5_irs_enable(void); +void gicv5_irs_its_probe(void); int gicv5_irs_register_cpu(int cpuid); int gicv5_irs_cpu_to_iaffid(int cpu_id, u16 *iaffid); struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id); int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type); int gicv5_irs_iste_alloc(u32 lpi); +void gicv5_irs_syncr(void); + +struct gicv5_its_devtab_cfg { + union { + struct { + __le64 *devtab; + } linear; + struct { + __le64 *l1devtab; + __le64 **l2ptrs; + } l2; + }; + u32 cfgr; +}; + +struct gicv5_its_itt_cfg { + union { + struct { + __le64 *itt; + unsigned int num_ents; + } linear; + struct { + __le64 *l1itt; + __le64 **l2ptrs; + unsigned int num_l1_ents; + u8 l2sz; + } l2; + }; + u8 event_id_bits; + bool l2itt; +}; void gicv5_init_lpis(u32 max); void gicv5_deinit_lpis(void); int gicv5_alloc_lpi(void); void gicv5_free_lpi(u32 lpi); + +void __init gicv5_its_of_probe(struct device_node *parent); #endif -- cgit v1.2.3 From 695949d8b16f11f2f172d8d0c7ccc1ae09ed6cb7 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 3 Jul 2025 12:25:19 +0200 Subject: irqchip/gic-v5: Add GICv5 IWB support The GICv5 architecture implements the Interrupt Wire Bridge (IWB) in order to support wired interrupts that cannot be connected directly to an IRS and instead uses the ITS to translate a wire event into an IRQ signal. Add the wired-to-MSI IWB driver to manage IWB wired interrupts. An IWB is connected to an ITS and it has its own deviceID for all interrupt wires that it manages; the IWB input wire number must be exposed to the ITS as an eventID with a 1:1 mapping. This eventID is not programmable and therefore requires a new msi_alloc_info_t flag to make sure the ITS driver does not allocate an eventid for the wire but rather it uses the msi_alloc_info_t.hwirq number to gather the ITS eventID. Co-developed-by: Sascha Bischoff Signed-off-by: Sascha Bischoff Co-developed-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Link: https://lore.kernel.org/r/20250703-gicv5-host-v7-29-12e71f1b3528@kernel.org Signed-off-by: Marc Zyngier --- include/asm-generic/msi.h | 1 + include/linux/irqchip/arm-gic-v5.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/asm-generic/msi.h b/include/asm-generic/msi.h index 124c734ca5d9..92cca4b23f13 100644 --- a/include/asm-generic/msi.h +++ b/include/asm-generic/msi.h @@ -33,6 +33,7 @@ typedef struct msi_alloc_info { /* Device generating MSIs is proxying for another device */ #define MSI_ALLOC_FLAGS_PROXY_DEVICE (1UL << 0) +#define MSI_ALLOC_FLAGS_FIXED_MSG_DATA (1UL << 1) #define GENERIC_MSI_DOMAIN_OPS 1 diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 07b952549bfa..68ddcdb1cec5 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -248,6 +248,23 @@ #define GICV5_ITS_HWIRQ_DEVICE_ID GENMASK_ULL(31, 0) #define GICV5_ITS_HWIRQ_EVENT_ID GENMASK_ULL(63, 32) +/* + * IWB registers + */ +#define GICV5_IWB_IDR0 0x0000 +#define GICV5_IWB_CR0 0x0080 +#define GICV5_IWB_WENABLE_STATUSR 0x00c0 +#define GICV5_IWB_WENABLER 0x2000 +#define GICV5_IWB_WTMR 0x4000 + +#define GICV5_IWB_IDR0_INT_DOMS GENMASK(14, 11) +#define GICV5_IWB_IDR0_IW_RANGE GENMASK(10, 0) + +#define GICV5_IWB_CR0_IDLE BIT(1) +#define GICV5_IWB_CR0_IWBEN BIT(0) + +#define GICV5_IWB_WENABLE_STATUSR_IDLE BIT(0) + /* * Global Data structures and functions */ -- cgit v1.2.3 From 4e655028c29fbc455fdbbd3ca074443361adfa44 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 7 Jul 2025 11:41:14 -0700 Subject: net: ethtool: remove the compat code for _rxfh_context ops All drivers are now converted to dedicated _rxfh_context ops. Remove the use of >set_rxfh() to manage additional contexts. Reviewed-by: Gal Pressman Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250707184115.2285277-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 59877fd2a1d3..de5bd76a400c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -865,9 +865,6 @@ struct kernel_ethtool_ts_info { * @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*. * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. - * @cap_rss_ctx_supported: indicates if the driver supports RSS - * contexts via legacy API, drivers implementing @create_rxfh_context - * do not have to set this bit. * @rxfh_per_ctx_fields: device supports selecting different header fields * for Rx hash calculation and RSS for each additional context. * @rxfh_per_ctx_key: device supports setting different RSS key for each @@ -1100,7 +1097,6 @@ struct kernel_ethtool_ts_info { struct ethtool_ops { u32 supported_input_xfrm:8; u32 cap_link_lanes_supported:1; - u32 cap_rss_ctx_supported:1; u32 rxfh_per_ctx_fields:1; u32 rxfh_per_ctx_key:1; u32 cap_rss_rxnfc_adds:1; -- cgit v1.2.3 From 1ec38ce3d024bebdff2a9ffb526e4d198605204d Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Fri, 27 Jun 2025 10:09:01 +0000 Subject: irqchip/gic-v5: Populate struct gic_kvm_info Populate the gic_kvm_info struct based on support for FEAT_GCIE_LEGACY. The struct is used by KVM to probe for a compatible GIC. Co-authored-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Sascha Bischoff Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20250627100847.1022515-3-sascha.bischoff@arm.com Signed-off-by: Oliver Upton --- include/linux/irqchip/arm-vgic-info.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index a75b2c7de69d..ca1713fac6e3 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -15,6 +15,8 @@ enum gic_type { GIC_V2, /* Full GICv3, optionally with v2 compat */ GIC_V3, + /* Full GICv5, optionally with v3 compat */ + GIC_V5, }; struct gic_kvm_info { @@ -34,6 +36,8 @@ struct gic_kvm_info { bool has_v4_1; /* Deactivation impared, subpar stuff */ bool no_hw_deactivation; + /* v3 compat support (GICv5 hosts, only) */ + bool has_gcie_v3_compat; }; #ifdef CONFIG_KVM -- cgit v1.2.3 From c017e49ed1381001ba7a6521daae8f968b11cf09 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Fri, 27 Jun 2025 10:09:02 +0000 Subject: KVM: arm64: gic-v5: Support GICv3 compat Add support for GICv3 compat mode (FEAT_GCIE_LEGACY) which allows a GICv5 host to run GICv3-based VMs. This change enables the VHE/nVHE/hVHE/protected modes, but does not support nested virtualization. A lazy-disable approach is taken for compat mode; it is enabled on the vgic_v3_load path but not disabled on the vgic_v3_put path. A non-GICv3 VM, i.e., one based on GICv5, is responsible for disabling compat mode on the corresponding vgic_v5_load path. Currently, GICv5 is not supported, and hence compat mode is not disabled again once it is enabled, and this function is intentionally omitted from the code. Co-authored-by: Timothy Hayes Signed-off-by: Timothy Hayes Signed-off-by: Sascha Bischoff Link: https://lore.kernel.org/r/20250627100847.1022515-5-sascha.bischoff@arm.com Signed-off-by: Oliver Upton --- include/kvm/arm_vgic.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4a34f7f0a864..5c293e0ff5c1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -38,6 +38,7 @@ enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ VGIC_V3, /* New fancy GICv3 */ + VGIC_V5, /* Newer, fancier GICv5 */ }; /* same for all guests, as depending only on the _host's_ GIC model */ @@ -77,9 +78,12 @@ struct vgic_global { /* Pseudo GICv3 from outer space */ bool no_hw_deactivation; - /* GIC system register CPU interface */ + /* GICv3 system register CPU interface */ struct static_key_false gicv3_cpuif; + /* GICv3 compat mode on a GICv5 host */ + bool has_gcie_v3_compat; + u32 ich_vtr_el2; }; -- cgit v1.2.3 From c56f97c5c71f17d781461d44acb777cd21521b81 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 19 Jun 2025 14:26:23 -0400 Subject: bitmap: generalize node_random() Generalize node_random() and make it available to general bitmaps and cpumasks users. Notice, find_first_bit() is generally faster than find_nth_bit(), and we employ it when there's a single set bit in the bitmap. See commit 3e061d924fe9c7b4 ("lib/nodemask: optimize node_random for nodemask with single NUMA node"). CC: Andrew Morton Signed-off-by: "Yury Norov [NVIDIA]" --- include/linux/find.h | 2 ++ include/linux/nodemask.h | 18 +++--------------- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index 5a2c267ea7f9..98c61838002c 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -44,6 +44,8 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +unsigned long find_random_bit(const unsigned long *addr, unsigned long size); + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f08ae71585fa..7ad1f5c7407e 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -492,21 +492,9 @@ static __always_inline int num_node_state(enum node_states state) static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) - int w, bit; - - w = nodes_weight(*maskp); - switch (w) { - case 0: - bit = NUMA_NO_NODE; - break; - case 1: - bit = first_node(*maskp); - break; - default: - bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w)); - break; - } - return bit; + int node = find_random_bit(maskp->bits, MAX_NUMNODES); + + return node < MAX_NUMNODES ? node : NUMA_NO_NODE; #else return 0; #endif -- cgit v1.2.3 From 012b1043420c0bc62e52902499de40b66f37fd6a Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 19 Jun 2025 14:26:24 -0400 Subject: cpumask: introduce cpumask_random() Propagate find_random_bit() to cpumask API. CC: Andrew Morton Signed-off-by: "Yury Norov [NVIDIA]" --- include/linux/cpumask.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7ae80a7ca81e..39b71b662da3 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -354,6 +354,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src) return find_next_bit_wrap(cpumask_bits(src), small_cpumask_bits, n + 1); } +/** + * cpumask_random - get random cpu in *src. + * @src: cpumask pointer + * + * Return: random set bit, or >= nr_cpu_ids if @src is empty. + */ +static __always_inline +unsigned int cpumask_random(const struct cpumask *src) +{ + return find_random_bit(cpumask_bits(src), nr_cpu_ids); +} + /** * for_each_cpu - iterate over every cpu in a mask * @cpu: the (optionally unsigned) integer iterator -- cgit v1.2.3 From f4e1fb04c12384fb1b69a95c33527b515a652a74 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Jul 2025 22:35:16 +0000 Subject: af_unix: Use cached value for SOCK_STREAM in unix_inq_len(). Compared to TCP, ioctl(SIOCINQ) for AF_UNIX SOCK_STREAM socket is more expensive, as unix_inq_len() requires iterating through the receive queue and accumulating skb->len. Let's cache the value for SOCK_STREAM to a new field during sendmsg() and recvmsg(). The field is protected by the receive queue lock. Note that ioctl(SIOCINQ) for SOCK_DGRAM returns the length of the first skb in the queue. SOCK_SEQPACKET still requires iterating through the queue because we do not touch functions shared with unix_dgram_ops. But, if really needed, we can support it by switching __skb_try_recv_datagram() to a custom version. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250702223606.1054680-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1af1841b7601..603f8cd026e5 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -47,6 +47,7 @@ struct unix_sock { #define peer_wait peer_wq.wait wait_queue_entry_t peer_wake; struct scm_stat scm_stat; + int inq_len; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) struct sk_buff *oob_skb; #endif -- cgit v1.2.3 From df30285b3670bf52e1e5512e4d4482bec5e93c16 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Jul 2025 22:35:18 +0000 Subject: af_unix: Introduce SO_INQ. We have an application that uses almost the same code for TCP and AF_UNIX (SOCK_STREAM). TCP can use TCP_INQ, but AF_UNIX doesn't have it and requires an extra syscall, ioctl(SIOCINQ) or getsockopt(SO_MEMINFO) as an alternative. Let's introduce the generic version of TCP_INQ. If SO_INQ is enabled, recvmsg() will put a cmsg of SCM_INQ that contains the exact value of ioctl(SIOCINQ). The cmsg is also included when msg->msg_get_inq is non-zero to make sockets io_uring-friendly. Note that SOCK_CUSTOM_SOCKOPT is flagged only for SOCK_STREAM to override setsockopt() for SOL_SOCKET. By having the flag in struct unix_sock, instead of struct sock, we can later add SO_INQ support for TCP and reuse tcp_sk(sk)->recvmsg_inq. Note also that supporting custom getsockopt() for SOL_SOCKET will need preparation for other SOCK_CUSTOM_SOCKOPT users (UDP, vsock, MPTCP). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250702223606.1054680-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 1 + include/uapi/asm-generic/socket.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 603f8cd026e5..34f53dde65ce 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -48,6 +48,7 @@ struct unix_sock { wait_queue_entry_t peer_wake; struct scm_stat scm_stat; int inq_len; + bool recvmsg_inq; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) struct sk_buff *oob_skb; #endif diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index f333a0ac4ee4..53b5a8c002b1 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -147,6 +147,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From eb1ac9ff6c4a5720b1a1476233be374c5dc44bff Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Jul 2025 16:01:31 -0700 Subject: ipv6: anycast: Don't hold RTNL for IPV6_JOIN_ANYCAST. inet6_sk(sk)->ipv6_ac_list is protected by lock_sock(). In ipv6_sock_ac_join(), only __dev_get_by_index(), __dev_get_by_flags(), and __in6_dev_get() require RTNL. __dev_get_by_flags() is only used by ipv6_sock_ac_join() and can be converted to RCU version. Let's replace RCU version helper and drop RTNL from IPV6_JOIN_ANYCAST. setsockopt_needs_rtnl() will be removed in the next patch. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250702230210.3115355-15-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5847c20994d3..a80d21a14612 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3332,8 +3332,8 @@ int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, struct net_device_path_stack *stack); -struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, - unsigned short mask); +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, + unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); -- cgit v1.2.3 From 98269398c02ab20eb9ed6d77416023a2627049d8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Jul 2025 12:31:39 +0300 Subject: RDMA/uverbs: Add empty rdma_uattrs_has_raw_cap() declaration The call to rdma_uattrs_has_raw_cap() is placed in mlx5 fs.c file, which is compiled without relation to CONFIG_INFINIBAND_USER_ACCESS. Despite the check is used only in flows with CONFIG_INFINIBAND_USER_ACCESS=y|m, the compilers generate the following error for CONFIG_INFINIBAND_USER_ACCESS=n builds. >> ERROR: modpost: "rdma_uattrs_has_raw_cap" [drivers/infiniband/hw/mlx5/mlx5_ib.ko] undefined! Fixes: f458ccd2aa2c ("RDMA/uverbs: Check CAP_NET_RAW in user namespace for flow create") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507080725.bh7xrhpg-lkp@intel.com/ Link: https://patch.msgid.link/72dee6b379bd709255a5d8e8010b576d50e47170.1751967071.git.leon@kernel.org Reviewed-by: Zhu Yanjun Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 010594dc755b..1d123812a1f9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4810,15 +4810,19 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs); #else static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) { return 0; } +static inline bool +rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs) +{ + return false; +} #endif -bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs); - struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, -- cgit v1.2.3 From be1ba9ed221ffb95a8bb15f4c83d0694225ba808 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 9 Jun 2025 21:35:13 +0300 Subject: wifi: mac80211: avoid weird state in error path If we get to the error path of ieee80211_prep_connection, for example because of a FW issue, then ieee80211_vif_set_links is called with 0. But the call to drv_change_vif_links from ieee80211_vif_update_links will probably fail as well, for the same reason. In this case, the valid_links and active_links bitmaps will be reverted to the value of the failing connection. Then, in the next connection, due to the logic of ieee80211_set_vif_links_bitmaps, valid_links will be set to the ID of the new connection assoc link, but the active_links will remain with the ID of the old connection's assoc link. If those IDs are different, we get into a weird state of valid_links and active_links being different. One of the consequences of this state is to call drv_change_vif_links with new_links as 0, since the & operation between the bitmaps will be 0. Since a removal of a link should always succeed, ignore the return value of drv_change_vif_links if it was called to only remove links, which is the case for the ieee80211_prep_connection's error path. That way, the bitmaps will not be reverted to have the value from the failing connection and will have 0, so the next connection will have a good state. Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250609213231.ba2011fb435f.Id87ff6dab5e1cf757b54094ac2d714c656165059@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcd5969bb559..a61ffdbf99be 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4477,6 +4477,8 @@ struct ieee80211_prep_tx_info { * new links bitmaps may be 0 if going from/to a non-MLO situation. * The @old array contains pointers to the old bss_conf structures * that were already removed, in case they're needed. + * Note that removal of link should always succeed, so the return value + * will be ignored in a removal only case. * This callback can sleep. * @change_sta_links: Change the valid links of a station, similar to * @change_vif_links. This callback can sleep. -- cgit v1.2.3 From 76164ca0d113e6a9f3033f948c739586fc606ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:57:57 +0200 Subject: vdso/vsyscall: Split up __arch_update_vsyscall() into __arch_update_vdso_clock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upcoming auxiliary clocks need this hook, too. To separate the architecture hooks from the timekeeper internals, refactor the hook to only operate on a single vDSO clock. While at it, use a more robust #define for the hook override. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-3-df7d9f87b9b8@linutronix.de --- include/asm-generic/vdso/vsyscall.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index b550afa15ecd..7fc0b560007d 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -22,11 +22,11 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ -#ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata) +#ifndef __arch_update_vdso_clock +static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc) { } -#endif /* __arch_update_vsyscall */ +#endif /* __arch_update_vdso_clock */ #ifndef __arch_sync_vdso_time_data static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) -- cgit v1.2.3 From ad64d71d7409a0602b50ee71c7f9663a3385c286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:57:58 +0200 Subject: vdso/helpers: Add helpers for seqlocks of single vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auxiliary clocks will have their vDSO data in a dedicated 'struct vdso_clock', which needs to be synchronized independently. Add a helper to synchronize a single vDSO clock. [ tglx: Move the SMP memory barriers to the call sites and get rid of the confusing first/last arguments and conditional barriers ] Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-4-df7d9f87b9b8@linutronix.de --- include/vdso/helpers.h | 50 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 0a98fed550ba..1a5ee9d9052c 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -28,17 +28,47 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, return seq != start; } -static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc) { - struct vdso_clock *vc = vd->clock_data; + /* + * WRITE_ONCE() is required otherwise the compiler can validly tear + * updates to vc->seq and it is possible that the value seen by the + * reader is inconsistent. + */ + WRITE_ONCE(vc->seq, vc->seq + 1); +} +static __always_inline void vdso_write_seq_end(struct vdso_clock *vc) +{ /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + WRITE_ONCE(vc->seq, vc->seq + 1); +} + +static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc) +{ + vdso_write_seq_begin(vc); + /* Ensure the sequence invalidation is visible before data is modified */ + smp_wmb(); +} + +static __always_inline void vdso_write_end_clock(struct vdso_clock *vc) +{ + /* Ensure the data update is visible before the sequence is set valid again */ + smp_wmb(); + vdso_write_seq_end(vc); +} + +static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + vdso_write_seq_begin(&vc[CS_HRES_COARSE]); + vdso_write_seq_begin(&vc[CS_RAW]); + /* Ensure the sequence invalidation is visible before data is modified */ smp_wmb(); } @@ -46,14 +76,10 @@ static __always_inline void vdso_write_end(struct vdso_time_data *vd) { struct vdso_clock *vc = vd->clock_data; + /* Ensure the data update is visible before the sequence is set valid again */ smp_wmb(); - /* - * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the - * reader is inconsistent. - */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + vdso_write_seq_end(&vc[CS_HRES_COARSE]); + vdso_write_seq_end(&vc[CS_RAW]); } #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From f0df91b6a7120d85c873f5e77bc183fb6eccda16 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Jun 2025 21:35:19 +0300 Subject: wifi: cfg80211: hide scan internals Hide the internal scan fields from mac80211 and drivers, the 'notified' variable is for internal tracking, and the 'info' is output that's passed to cfg80211_scan_done() and stored only for delayed userspace notification. Signed-off-by: Johannes Berg Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250609213231.6a62e41858e2.I004f66e9c087cc6e6ae4a24951cf470961ee9466@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4a092da3a9de..5d5ad7926877 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2748,8 +2748,6 @@ struct cfg80211_scan_6ghz_params { * @wiphy: the wiphy this was for * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for - * @info: (internal) information about completed scan - * @notified: (internal) scan request was notified as done or aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band * @mac_addr: MAC address used with randomisation * @mac_addr_mask: MAC address mask used with randomisation, bits that @@ -2780,12 +2778,8 @@ struct cfg80211_scan_request { u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); u8 bssid[ETH_ALEN] __aligned(2); - - /* internal */ struct wiphy *wiphy; unsigned long scan_start; - struct cfg80211_scan_info info; - bool notified; bool no_cck; bool scan_6ghz; u32 n_6ghz_params; -- cgit v1.2.3 From afebe192ebfef7f6e0be0a070325995771bcd7e8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 9 Jun 2025 21:35:21 +0300 Subject: wifi: cfg80211: only verify part of Extended MLD Capabilities We verify that the Extended MLD Capabilities are matching between links. However, some bits are reserved and in particular the Recommended Max Links subfield may not necessarily match. So only verify the known subfields that can reliably be expected to be the same. More information can be found in Table 9-417o, in IEEE P802.11be/D7.0. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250609213231.a2fad48dd3e6.Iae1740cd2ac833bc4a64fd2af718e1485158fd42@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 120de474a8bf..e05219a912f9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2024 Intel Corporation + * Copyright (c) 2018 - 2025 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -5333,6 +5333,13 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) return get_unaligned_le16(common); } +/* Defined in Figure 9-1074t in P802.11be_D7.0 */ +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE 0x0001 +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_RECO_MAX_LINKS_MASK 0x001e +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE 0x0020 +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK 0x0040 +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_BTM_MLD_RECO_MULTI_AP 0x0080 + /** * ieee80211_mle_get_ext_mld_capa_op - returns the extended MLD capabilities * and operations. -- cgit v1.2.3 From 984462751d57047828ff4a799cc7d4670a2cfeb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Jun 2025 21:35:22 +0300 Subject: wifi: mac80211: remove DISALLOW_PUNCTURING_5GHZ code Since iwlwifi was the only driver using this and no longer does, we can remove all this code. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250609213231.4dff5fb8890f.Ie531f912b252a0042c18c0734db50c3afe1adfb5@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a61ffdbf99be..14a6bd120f25 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2852,8 +2852,6 @@ struct ieee80211_txq { * * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT * and connecting with a lower bandwidth instead - * @IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ: HW requires disabling puncturing in - * EHT in 5 GHz and connecting with a lower bandwidth instead * * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so * no need to stop queues. This really should be set by a driver that @@ -2923,7 +2921,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, IEEE80211_HW_DISALLOW_PUNCTURING, - IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ, IEEE80211_HW_HANDLES_QUIET_CSA, IEEE80211_HW_STRICT, -- cgit v1.2.3 From 62c57ebb3107842482bc5e3568a0202295a8db0d Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 9 Jun 2025 21:35:23 +0300 Subject: wifi: cfg80211: add a flag for the first part of a scan When there are no non-6 GHz channels, then the 6 GHz scan is the first part of a split scan. Add a boolean denoting whether the scan is the first part of a scan as it might be useful to drivers for internal bookkeeping. This flag is also set if the scan is not split. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250609213231.07e5a8a452ec.Ibf18f513e507422078fb31b28947e582a20df87a@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5d5ad7926877..6ec9a8865b8b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2754,7 +2754,10 @@ struct cfg80211_scan_6ghz_params { * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr * @scan_6ghz: relevant for split scan request only, - * true if this is the second scan request + * true if this is a 6 GHz scan request + * @first_part: %true if this is the first part of a split scan request or a + * scan that was not split. May be %true for a @scan_6ghz scan if no other + * channels were requested * @n_6ghz_params: number of 6 GHz params * @scan_6ghz_params: 6 GHz params * @bssid: BSSID to scan for (most commonly, the wildcard BSSID) @@ -2782,6 +2785,7 @@ struct cfg80211_scan_request { unsigned long scan_start; bool no_cck; bool scan_6ghz; + bool first_part; u32 n_6ghz_params; struct cfg80211_scan_6ghz_params *scan_6ghz_params; s8 tsf_report_link_id; -- cgit v1.2.3 From a11ec0dc920b2a23da9d88063b5f15d7eada6128 Mon Sep 17 00:00:00 2001 From: Somashekhar Puttagangaiah Date: Mon, 9 Jun 2025 21:35:26 +0300 Subject: wifi: cfg80211/mac80211: implement dot11ExtendedRegInfoSupport Implement dot11ExtendedRegInfoSupport to advertise non-AP station regulatory power capability as part of regulatory connectivity element in (Re)Association request frames so that AP can achieve maximum client connectivity. Control field which was interpreted using value of 3-bits B5 to B3, now uses value of 4-bits B6 to B3 to interpret the type of AP. Hence update IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO to parse 4-bits control field. If older AP still updates only 3-bits value of control field, station can still interpret the value as per section E.2.7 of IEEE 802.11 REVme D7.0 and support the appropriate AP type. Also update IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP as the value of standard power AP is changed to 8 instead of 4 so that AP can support both LPI AP and SP AP to maximize the connectivity with stations. For backward compatibility, keeping value 4 as old AP by limiting it to SP AP only. Signed-off-by: Somashekhar Puttagangaiah Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250609213232.90cdef116aad.I85da390fbee59355e3855691933e6a5e55c47ac4@changeid [fix kernel-doc] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e05219a912f9..ea95c4a60fa6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2837,11 +2837,12 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 -#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 -#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 -#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 -#define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 -#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 4 +#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 +#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 +#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD 4 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 8 /** * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field @@ -2859,13 +2860,31 @@ struct ieee80211_he_6ghz_oper { #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ 2 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ 3 #define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4 -#define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x38 +#define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x78 u8 control; u8 ccfs0; u8 ccfs1; u8 minrate; } __packed; +/** + * enum ieee80211_reg_conn_bits - represents Regulatory connectivity field bits. + * + * This enumeration defines bit flags used to represent regulatory connectivity + * field bits. + * + * @IEEE80211_REG_CONN_LPI_VALID: Indicates whether the LPI bit is valid. + * @IEEE80211_REG_CONN_LPI_VALUE: Represents the value of the LPI bit. + * @IEEE80211_REG_CONN_SP_VALID: Indicates whether the SP bit is valid. + * @IEEE80211_REG_CONN_SP_VALUE: Represents the value of the SP bit. + */ +enum ieee80211_reg_conn_bits { + IEEE80211_REG_CONN_LPI_VALID = BIT(0), + IEEE80211_REG_CONN_LPI_VALUE = BIT(1), + IEEE80211_REG_CONN_SP_VALID = BIT(2), + IEEE80211_REG_CONN_SP_VALUE = BIT(3), +}; + /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, @@ -3847,6 +3866,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, WLAN_EID_EXT_FILS_NONCE = 13, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14, + WLAN_EID_EXT_DH_PARAMETER = 32, WLAN_EID_EXT_HE_CAPABILITY = 35, WLAN_EID_EXT_HE_OPERATION = 36, WLAN_EID_EXT_UORA = 37, @@ -3870,6 +3890,8 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_CAPABILITY = 108, WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, + WLAN_EID_EXT_KNOWN_STA_IDENTIFCATION = 136, + WLAN_EID_EXT_NON_AP_STA_REG_CON = 137, }; /* Action category code */ -- cgit v1.2.3 From 6b04716cdcac37bdbacde34def08bc6fdb5fc4e2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Jun 2025 21:35:27 +0300 Subject: wifi: mac80211: don't complete management TX on SAE commit When SAE commit is sent and received in response, there's no ordering for the SAE confirm messages. As such, don't call drivers to stop listening on the channel when the confirm message is still expected. This fixes an issue if the local confirm is transmitted later than the AP's confirm, for iwlwifi (and possibly mt76) the AP's confirm would then get lost since the device isn't on the channel at the time the AP transmit the confirm. For iwlwifi at least, this also improves the overall timing of the authentication handshake (by about 15ms according to the report), likely since the session protection won't be aborted and rescheduled. Note that even before this, mgd_complete_tx() wasn't always called for each call to mgd_prepare_tx() (e.g. in the case of WEP key shared authentication), and the current drivers that have the complete callback don't seem to mind. Document this as well though. Reported-by: Jan Hendrik Farr Closes: https://lore.kernel.org/all/aB30Ea2kRG24LINR@archlinux/ Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250609213232.12691580e140.I3f1d3127acabcd58348a110ab11044213cf147d3@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 14a6bd120f25..577fd6a8c372 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4310,6 +4310,8 @@ struct ieee80211_prep_tx_info { * @mgd_complete_tx: Notify the driver that the response frame for a previously * transmitted frame announced with @mgd_prepare_tx was received, the data * is filled similarly to @mgd_prepare_tx though the duration is not used. + * Note that this isn't always called for each mgd_prepare_tx() call, for + * example for SAE the 'confirm' messages can be on the air in any order. * * @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending * a TDLS discovery-request, we expect a reply to arrive on the AP's -- cgit v1.2.3 From 488e6eaab88cfa4b6fd2e2bb72fac9cfdc8c403b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 4 Jul 2025 17:57:49 +0800 Subject: usb: core: add dma-noncoherent buffer alloc and free API This will add usb_alloc_noncoherent() and usb_free_noncoherent() functions to support alloc and free buffer in a dma-noncoherent way. To explicit manage the memory ownership for the kernel and device, this will also add usb_dma_noncoherent_sync_for_cpu/device() functions and call it at proper time. The management requires the user save sg_table returned by usb_alloc_noncoherent() to urb->sgt. Signed-off-by: Xu Yang Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250704095751.73765-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 68166718ab30..535ac37198a1 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1621,6 +1621,7 @@ struct urb { void *transfer_buffer; /* (in) associated data buffer */ dma_addr_t transfer_dma; /* (in) dma addr for transfer_buffer */ struct scatterlist *sg; /* (in) scatter gather buffer list */ + struct sg_table *sgt; /* (in) scatter gather table for noncoherent buffer */ int num_mapped_sgs; /* (internal) mapped sg entries */ int num_sgs; /* (in) number of entries in the sg list */ u32 transfer_buffer_length; /* (in) data buffer length */ @@ -1826,6 +1827,16 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size, void usb_free_coherent(struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); +enum dma_data_direction; + +void *usb_alloc_noncoherent(struct usb_device *dev, size_t size, + gfp_t mem_flags, dma_addr_t *dma, + enum dma_data_direction dir, + struct sg_table **table); +void usb_free_noncoherent(struct usb_device *dev, size_t size, + void *addr, enum dma_data_direction dir, + struct sg_table *table); + /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ -- cgit v1.2.3 From f393a761763c542761abcf978252d431269366d6 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 8 Jul 2025 14:56:23 +0200 Subject: efi: add ovmf debug log driver Recent OVMF versions (edk2-stable202508 + newer) can write their debug log to a memory buffer. This driver exposes the log content via sysfs (/sys/firmware/efi/ovmf_debug_log). Signed-off-by: Gerd Hoffmann Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d63d1d75f22..50db7df0efab 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -439,6 +439,7 @@ void efi_native_runtime_setup(void); /* OVMF protocol GUIDs */ #define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID EFI_GUID(0xc5a010fe, 0x38a7, 0x4531, 0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49) +#define OVMF_MEMORY_LOG_TABLE_GUID EFI_GUID(0x95305139, 0xb20f, 0x4723, 0x84, 0x25, 0x62, 0x7c, 0x88, 0x8f, 0xf1, 0x21) typedef struct { efi_guid_t guid; @@ -642,6 +643,7 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ + unsigned long ovmf_debug_log; unsigned long mokvar_table; /* MOK variable config table */ unsigned long coco_secret; /* Confidential computing secret table */ unsigned long unaccepted; /* Unaccepted memory table */ @@ -1344,6 +1346,8 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); +int ovmf_log_probe(unsigned long ovmf_debug_log_table); + /* * efivar ops event type */ -- cgit v1.2.3 From 6c3b746fd536b7612b23e5c2041365014b85082e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Jul 2025 13:47:09 +0200 Subject: pmdomain: core: Export a common ->sync_state() helper for genpd providers In some cases the typical platform driver that act as genpd provider, may need its own ->sync_state() callback to manage various things. In this regards, the provider most likely wants to allow its corresponding genpds to be powered-off. For this reason, let's introduce a new genpd helper function, of_genpd_sync_state() that helps genpd provider drivers to achieve this. Suggested-by: Saravana Kannan Reviewed-by: Abel Vesa Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-8-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 0b18160901a2..3578196e6626 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -431,6 +431,7 @@ int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); +void of_genpd_sync_state(struct device_node *np); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -476,6 +477,8 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, return -ENODEV; } +static inline void of_genpd_sync_state(struct device_node *np) {} + static inline int genpd_dev_pm_attach(struct device *dev) { return 0; -- cgit v1.2.3 From c8c196220ce5eba9b7d4aca37a7fd4bbb965d2ed Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Jul 2025 13:47:10 +0200 Subject: pmdomain: core: Prepare to add the common ->sync_state() support Before we can implement the common ->sync_state() support in genpd, we need to allow a few specific genpd providers to opt out from the new behaviour. Let's introduce GENPD_FLAG_NO_SYNC_STATE as a new genpd config option, to allow providers to opt out. Suggested-by: Saravana Kannan Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-9-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 3578196e6626..9329554b9c4a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -104,6 +104,11 @@ struct dev_pm_domain_list { * GENPD_FLAG_DEV_NAME_FW: Instructs genpd to generate an unique device name * using ida. It is used by genpd providers which * get their genpd-names directly from FW. + * + * GENPD_FLAG_NO_SYNC_STATE: The ->sync_state() support is implemented in a + * genpd provider specific way, likely through a + * parent device node. This flag makes genpd to + * skip its internal support for this. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -114,6 +119,7 @@ struct dev_pm_domain_list { #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) #define GENPD_FLAG_OPP_TABLE_FW (1U << 7) #define GENPD_FLAG_DEV_NAME_FW (1U << 8) +#define GENPD_FLAG_NO_SYNC_STATE (1U << 9) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ -- cgit v1.2.3 From 10086a4f391f4b9c969a21e785e7fa0fa6c023e5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Jul 2025 13:47:17 +0200 Subject: firmware: xilinx: Don't share zynqmp_pm_init_finalize() As there no longer any users outside the zynqmp firmware driver of zynqmp_pm_init_finalize(), let's turn into a local static function. Cc: Michal Simek Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-16-ulf.hansson@linaro.org --- include/linux/firmware/xlnx-zynqmp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 6d4dbc196b93..ae48d619c4e0 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -585,7 +585,6 @@ int zynqmp_pm_reset_assert(const u32 reset, int zynqmp_pm_reset_get_status(const u32 reset, u32 *status); unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode); int zynqmp_pm_bootmode_write(u32 ps_mode); -int zynqmp_pm_init_finalize(void); int zynqmp_pm_set_suspend_mode(u32 mode); int zynqmp_pm_request_node(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); @@ -746,11 +745,6 @@ static inline int zynqmp_pm_bootmode_write(u32 ps_mode) return -ENODEV; } -static inline int zynqmp_pm_init_finalize(void) -{ - return -ENODEV; -} - static inline int zynqmp_pm_set_suspend_mode(u32 mode) { return -ENODEV; -- cgit v1.2.3 From 9a4681a485ee1203ac968065490a8eeaa6615503 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Jul 2025 13:47:19 +0200 Subject: driver core: Export get_dev_from_fwnode() It has turned out get_dev_from_fwnode() is useful at a few other places outside of the driver core, as in gpiolib.c for example. Therefore let's make it available as a common helper function. Suggested-by: Saravana Kannan Cc: Greg Kroah-Hartman Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Acked-by: Greg Kroah-Hartman Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-18-ulf.hansson@linaro.org --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 4940db137fff..315b00171335 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1048,6 +1048,7 @@ void device_set_node(struct device *dev, struct fwnode_handle *fwnode); int device_add_of_node(struct device *dev, struct device_node *of_node); void device_remove_of_node(struct device *dev); void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); +struct device *get_dev_from_fwnode(struct fwnode_handle *fwnode); static inline struct device_node *dev_of_node(struct device *dev) { -- cgit v1.2.3 From 3b7b8acacf372945b4855a136634775c064a57f8 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Jul 2025 13:47:20 +0200 Subject: pmdomain: core: Add common ->sync_state() support for genpd providers If the genpd provider's fwnode doesn't have an associated struct device with it, we can make use of the generic genpd->dev and it corresponding driver internally in genpd to manage ->sync_state(). More precisely, while adding a genpd OF provider let's check if the fwnode has a device and if not, make the preparation to handle ->sync_state() internally through the genpd_provider_driver and the genpd_provider_bus. Note that, genpd providers may opt out from this behaviour by setting the GENPD_FLAG_NO_SYNC_STATE config options for the genpds in question. Suggested-by: Saravana Kannan Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-19-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9329554b9c4a..d68e07dadc99 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -133,6 +133,12 @@ enum genpd_notication { GENPD_NOTIFY_ON, }; +enum genpd_sync_state { + GENPD_SYNC_STATE_OFF = 0, + GENPD_SYNC_STATE_SIMPLE, + GENPD_SYNC_STATE_ONECELL, +}; + struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); bool (*suspend_ok)(struct device *dev); @@ -193,6 +199,7 @@ struct generic_pm_domain { unsigned int performance_state; /* Aggregated max performance state */ cpumask_var_t cpus; /* A cpumask of the attached CPUs */ bool synced_poweroff; /* A consumer needs a synced poweroff */ + enum genpd_sync_state sync_state; /* How sync_state is managed. */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct raw_notifier_head power_notifiers; /* Power on/off notifiers */ -- cgit v1.2.3 From 2b5630e9886f9121535d421ebfb240a9535f3f1e Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 1 Jul 2025 13:47:21 +0200 Subject: driver core: Add dev_set_drv_sync_state() This can be used by frameworks to set the sync_state() helper functions for drivers that don't already have them set. Signed-off-by: Saravana Kannan Acked-by: Greg Kroah-Hartman Reviewed-by: Abel Vesa Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-20-ulf.hansson@linaro.org --- include/linux/device.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 315b00171335..686f2a578fbd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -917,6 +917,18 @@ static inline bool dev_has_sync_state(struct device *dev) return false; } +static inline int dev_set_drv_sync_state(struct device *dev, + void (*fn)(struct device *dev)) +{ + if (!dev || !dev->driver) + return 0; + if (dev->driver->sync_state && dev->driver->sync_state != fn) + return -EBUSY; + if (!dev->driver->sync_state) + dev->driver->sync_state = fn; + return 0; +} + static inline void dev_set_removable(struct device *dev, enum device_removable removable) { -- cgit v1.2.3 From 13a4b7fb62600e1c0738fdb0b7176555ff05aadf Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Jul 2025 13:47:23 +0200 Subject: pmdomain: core: Leave powered-on genpds on until late_initcall_sync Powering-off a genpd that was on during boot, before all of its consumer devices have been probed, is certainly prone to problems. As a step to improve this situation, let's prevent these genpds from being powered-off until genpd_power_off_unused() gets called, which is a late_initcall_sync(). Note that, this still doesn't guarantee that all the consumer devices has been probed before we allow to power-off the genpds. Yet, this should be a step in the right direction. Suggested-by: Saravana Kannan Tested-by: Hiago De Franco # Colibri iMX8X Tested-by: Tomi Valkeinen # TI AM62A,Xilinx ZynqMP ZCU106 Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250701114733.636510-22-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index d68e07dadc99..99556589f45e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -199,6 +199,7 @@ struct generic_pm_domain { unsigned int performance_state; /* Aggregated max performance state */ cpumask_var_t cpus; /* A cpumask of the attached CPUs */ bool synced_poweroff; /* A consumer needs a synced poweroff */ + bool stay_on; /* Stay powered-on during boot. */ enum genpd_sync_state sync_state; /* How sync_state is managed. */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); -- cgit v1.2.3 From d9bc88aa54d6aa22ff1e850a86be7a37f0503889 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jul 2025 22:24:19 +0100 Subject: debugfs: split short and full proxy wrappers, kill debugfs_real_fops() All users outside of fs/debugfs/file.c are gone, in there we can just fully split the wrappers for full and short cases and be done with that. Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20250702212419.GG3406663@ZenIV Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index fa2568b4380d..a420152105d0 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -162,7 +162,6 @@ void debugfs_remove(struct dentry *dentry); void debugfs_lookup_and_remove(const char *name, struct dentry *parent); -const struct file_operations *debugfs_real_fops(const struct file *filp); const void *debugfs_get_aux(const struct file *file); int debugfs_file_get(struct dentry *dentry); @@ -329,7 +328,6 @@ static inline void debugfs_lookup_and_remove(const char *name, struct dentry *parent) { } -const struct file_operations *debugfs_real_fops(const struct file *filp); void *debugfs_get_aux(const struct file *file); static inline int debugfs_file_get(struct dentry *dentry) -- cgit v1.2.3 From 9d3b96be2ee81a7d6ad08cb5094753f06382db1b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jul 2025 22:26:16 +0100 Subject: debugfs_get_aux(): allow storing non-const void * typechecking is up to users, anyway... Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20250702212616.GI3406663@ZenIV Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index a420152105d0..7cecda29447e 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -162,7 +162,7 @@ void debugfs_remove(struct dentry *dentry); void debugfs_lookup_and_remove(const char *name, struct dentry *parent); -const void *debugfs_get_aux(const struct file *file); +void *debugfs_get_aux(const struct file *file); int debugfs_file_get(struct dentry *dentry); void debugfs_file_put(struct dentry *dentry); -- cgit v1.2.3 From 570c8efd5eb79c3725ba439ce105ed1bedc5acd9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 May 2025 17:28:00 +0200 Subject: sched/psi: Optimize psi_group_change() cpu_clock() usage Dietmar reported that commit 3840cbe24cf0 ("sched: psi: fix bogus pressure spikes from aggregation race") caused a regression for him on a high context switch rate benchmark (schbench) due to the now repeating cpu_clock() calls. In particular the problem is that get_recent_times() will extrapolate the current state to 'now'. But if an update uses a timestamp from before the start of the update, it is possible to get two reads with inconsistent results. It is effectively back-dating an update. (note that this all hard-relies on the clock being synchronized across CPUs -- if this is not the case, all bets are off). Combine this problem with the fact that there are per-group-per-cpu seqcounts, the commit in question pushed the clock read into the group iteration, causing tree-depth cpu_clock() calls. On architectures where cpu_clock() has appreciable overhead, this hurts. Instead move to a per-cpu seqcount, which allows us to have a single clock read for all group updates, increasing internal consistency and lowering update overhead. This comes at the cost of a longer update side (proportional to the tree depth) which can cause the read side to retry more often. Fixes: 3840cbe24cf0 ("sched: psi: fix bogus pressure spikes from aggregation race") Reported-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Tested-by: Dietmar Eggemann , Link: https://lkml.kernel.org/20250522084844.GC31726@noisy.programming.kicks-ass.net --- include/linux/psi_types.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index f1fd3a8044e0..dd10c22299ab 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -84,11 +84,9 @@ enum psi_aggregators { struct psi_group_cpu { /* 1st cacheline updated by the scheduler */ - /* Aggregator needs to know of concurrent changes */ - seqcount_t seq ____cacheline_aligned_in_smp; - /* States of the tasks belonging to this group */ - unsigned int tasks[NR_PSI_TASK_COUNTS]; + unsigned int tasks[NR_PSI_TASK_COUNTS] + ____cacheline_aligned_in_smp; /* Aggregate pressure state derived from the tasks */ u32 state_mask; -- cgit v1.2.3 From cccb45d7c4295bbfeba616582d0249f2d21e6df5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 May 2025 11:19:30 +0200 Subject: sched/deadline: Less agressive dl_server handling Chris reported that commit 5f6bd380c7bd ("sched/rt: Remove default bandwidth control") caused a significant dip in his favourite benchmark of the day. Simply disabling dl_server cured things. His workload hammers the 0->1, 1->0 transitions, and the dl_server_{start,stop}() overhead kills it -- fairly obviously a bad idea in hind sight and all that. Change things around to only disable the dl_server when there has not been a fair task around for a whole period. Since the default period is 1 second, this ensures the benchmark never trips this, overhead gone. Fixes: 557a6bfc662c ("sched/fair: Add trivial fair server") Reported-by: Chris Mason Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juri Lelli Acked-by: Juri Lelli Link: https://lkml.kernel.org/r/20250702121158.465086194@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index eec6b225e9d1..4802fcf738cd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -698,6 +698,7 @@ struct sched_dl_entity { unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; + unsigned int dl_server_idle : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit v1.2.3 From 74eec63661d46a7153d04c2e0249eeb76cc76d44 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 8 Jul 2025 18:56:26 +0200 Subject: sched/fair: Fix NO_RUN_TO_PARITY case EEVDF expects the scheduler to allocate a time quantum to the selected entity and then pick a new entity for next quantum. Although this notion of time quantum is not strictly doable in our case, we can ensure a minimum runtime for each task most of the time and pick a new entity after a minimum time has elapsed. Reuse the slice protection of run to parity to ensure such runtime quantum. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250708165630.1948751-3-vincent.guittot@linaro.org --- include/linux/sched.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4802fcf738cd..55921385927d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -583,7 +583,15 @@ struct sched_entity { u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; - s64 vlag; + union { + /* + * When !@on_rq this field is vlag. + * When cfs_rq->curr == se (which implies @on_rq) + * this field is vprot. See protect_slice(). + */ + s64 vlag; + u64 vprot; + }; u64 slice; u64 nr_migrations; -- cgit v1.2.3 From f6bfc9afc7510cb5e6fbe0a17c507917b0120280 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 7 Jul 2025 15:11:55 +0200 Subject: drm/framebuffer: Acquire internal references on GEM handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acquire GEM handles in drm_framebuffer_init() and release them in the corresponding drm_framebuffer_cleanup(). Ties the handle's lifetime to the framebuffer. Not all GEM buffer objects have GEM handles. If not set, no refcounting takes place. This is the case for some fbdev emulation. This is not a problem as these GEM objects do not use dma-bufs and drivers will not release them while fbdev emulation is running. Framebuffer flags keep a bit per color plane of which the framebuffer holds a GEM handle reference. As all drivers use drm_framebuffer_init(), they will now all hold dma-buf references as fixed in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers"). In the GEM framebuffer helpers, restore the original ref counting on buffer objects. As the helpers for handle refcounting are now no longer called from outside the DRM core, unexport the symbols. v3: - don't mix internal flags with mode flags (Christian) v2: - track framebuffer handle refs by flag - drop gma500 cleanup (Christian) Signed-off-by: Thomas Zimmermann Fixes: 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") Reported-by: Bert Karwatzki Closes: https://lore.kernel.org/dri-devel/20250703115915.3096-1-spasswolf@web.de/ Tested-by: Bert Karwatzki Tested-by: Mario Limonciello Tested-by: Borislav Petkov (AMD) Cc: Thomas Zimmermann Cc: Anusha Srivatsa Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250707131224.249496-1-tzimmermann@suse.de --- include/drm/drm_framebuffer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 668077009fce..38b24fc8978d 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -23,6 +23,7 @@ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ +#include #include #include #include @@ -100,6 +101,8 @@ struct drm_framebuffer_funcs { unsigned num_clips); }; +#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i)) + /** * struct drm_framebuffer - frame buffer object * @@ -188,6 +191,10 @@ struct drm_framebuffer { * DRM_MODE_FB_MODIFIERS. */ int flags; + /** + * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF. + */ + unsigned int internal_flags; /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ -- cgit v1.2.3 From bd46cece51a36ef088f22ef0416ac13b0a46d5b0 Mon Sep 17 00:00:00 2001 From: Simona Vetter Date: Mon, 7 Jul 2025 17:18:13 +0200 Subject: drm/gem: Fix race in drm_gem_handle_create_tail() Object creation is a careful dance where we must guarantee that the object is fully constructed before it is visible to other threads, and GEM buffer objects are no difference. Final publishing happens by calling drm_gem_handle_create(). After that the only allowed thing to do is call drm_gem_object_put() because a concurrent call to the GEM_CLOSE ioctl with a correctly guessed id (which is trivial since we have a linear allocator) can already tear down the object again. Luckily most drivers get this right, the very few exceptions I've pinged the relevant maintainers for. Unfortunately we also need drm_gem_handle_create() when creating additional handles for an already existing object (e.g. GETFB ioctl or the various bo import ioctl), and hence we cannot have a drm_gem_handle_create_and_put() as the only exported function to stop these issues from happening. Now unfortunately the implementation of drm_gem_handle_create() isn't living up to standards: It does correctly finishe object initialization at the global level, and hence is safe against a concurrent tear down. But it also sets up the file-private aspects of the handle, and that part goes wrong: We fully register the object in the drm_file.object_idr before calling drm_vma_node_allow() or obj->funcs->open, which opens up races against concurrent removal of that handle in drm_gem_handle_delete(). Fix this with the usual two-stage approach of first reserving the handle id, and then only registering the object after we've completed the file-private setup. Jacek reported this with a testcase of concurrently calling GEM_CLOSE on a freshly-created object (which also destroys the object), but it should be possible to hit this with just additional handles created through import or GETFB without completed destroying the underlying object with the concurrent GEM_CLOSE ioctl calls. Note that the close-side of this race was fixed in f6cd7daecff5 ("drm: Release driver references to handle before making it available again"), which means a cool 9 years have passed until someone noticed that we need to make this symmetry or there's still gaps left :-/ Without the 2-stage close approach we'd still have a race, therefore that's an integral part of this bugfix. More importantly, this means we can have NULL pointers behind allocated id in our drm_file.object_idr. We need to check for that now: - drm_gem_handle_delete() checks for ERR_OR_NULL already - drm_gem.c:object_lookup() also chekcs for NULL - drm_gem_release() should never be called if there's another thread still existing that could call into an IOCTL that creates a new handle, so cannot race. For paranoia I added a NULL check to drm_gem_object_release_handle() though. - most drivers (etnaviv, i915, msm) are find because they use idr_find(), which maps both ENOENT and NULL to NULL. - drivers using idr_for_each_entry() should also be fine, because idr_get_next does filter out NULL entries and continues the iteration. - The same holds for drm_show_memory_stats(). v2: Use drm_WARN_ON (Thomas) Reported-by: Jacek Lawrynowicz Tested-by: Jacek Lawrynowicz Reviewed-by: Thomas Zimmermann Cc: stable@vger.kernel.org Cc: Jacek Lawrynowicz Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Simona Vetter Signed-off-by: Simona Vetter Signed-off-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250707151814.603897-1-simona.vetter@ffwll.ch --- include/drm/drm_file.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..d344d41e6cfe 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -300,6 +300,9 @@ struct drm_file { * * Mapping of mm object handles to object pointers. Used by the GEM * subsystem. Protected by @table_lock. + * + * Note that allocated entries might be NULL as a transient state when + * creating or deleting a handle. */ struct idr object_idr; -- cgit v1.2.3 From a7cec20845a67ff4f3c924255519341f37d993f9 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 25 Jun 2025 17:12:22 -0700 Subject: KVM: x86: Provide a capability to disable APERF/MPERF read intercepts Allow a guest to read the physical IA32_APERF and IA32_MPERF MSRs without interception. The IA32_APERF and IA32_MPERF MSRs are not virtualized. Writes are not handled at all. The MSR values are not zeroed on vCPU creation, saved on suspend, or restored on resume. No accommodation is made for processor migration or for sharing a logical processor with other tasks. No adjustments are made for non-unit TSC multipliers. The MSRs do not account for time the same way as the comparable PMU events, whether the PMU is virtualized by the traditional emulation method or the new mediated pass-through approach. Nonetheless, in a properly constrained environment, this capability can be combined with a guest CPUID table that advertises support for CPUID.6:ECX.APERFMPERF[bit 0] to induce a Linux guest to report the effective physical CPU frequency in /proc/cpuinfo. Moreover, there is no performance cost for this capability. Signed-off-by: Jim Mattson Link: https://lore.kernel.org/r/20250530185239.2335185-3-jmattson@google.com Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250626001225.744268-3-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7a4c35ff03fe..aeb2ca10b190 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -644,6 +644,7 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) +#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { -- cgit v1.2.3 From 0af3ecdde58676f6c42eeec07d6816d5bf87ff88 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 9 Jul 2025 21:21:13 +0200 Subject: printk: Make vprintk_deferred() public vprintk_deferred() is useful for implementing runtime verification reactors. Make it public. Signed-off-by: Nam Cao Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt (Google) --- include/linux/printk.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 5b462029d03c..5d22b803f51e 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -154,6 +154,8 @@ int vprintk_emit(int facility, int level, asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); +__printf(1, 0) +int vprintk_deferred(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int _printk(const char *fmt, ...); @@ -214,6 +216,11 @@ int vprintk(const char *s, va_list args) { return 0; } +static inline __printf(1, 0) +int vprintk_deferred(const char *fmt, va_list args) +{ + return 0; +} static inline __printf(1, 2) __cold int _printk(const char *s, ...) { -- cgit v1.2.3 From 3f045de7f557850ca6b3632c6d45c2cdaf948694 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 9 Jul 2025 21:21:14 +0200 Subject: panic: Add vpanic() vpanic() is useful for implementing runtime verification reactors. Add it. Signed-off-by: Nam Cao Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt (Google) --- include/linux/panic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/panic.h b/include/linux/panic.h index 4adc65766935..0332c6d6771f 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -3,6 +3,7 @@ #define _LINUX_PANIC_H #include +#include #include struct pt_regs; @@ -10,6 +11,8 @@ struct pt_regs; extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; +__printf(1, 0) +void vpanic(const char *fmt, va_list args) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); void check_panic_on_warn(const char *origin); extern void oops_enter(void); -- cgit v1.2.3 From ff4e233d8ab70fe6ae460ecc8c0e5b24dd0fedb0 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 9 Jul 2025 21:21:15 +0200 Subject: rv: Let the reactors take care of buffers Each RV monitor has one static buffer to send to the reactors. If multiple errors are detected simultaneously, the one buffer could be overwritten. Instead, leave it to the reactors to handle buffering. Reviewed-by: Gabriele Monaco Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 9 +++++++-- include/rv/da_monitor.h | 45 ++++++++++----------------------------------- 2 files changed, 17 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 3452b5e4b29e..9428e62eb8e9 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -38,7 +38,7 @@ union rv_task_monitor { struct rv_reactor { const char *name; const char *description; - void (*react)(char *msg); + __printf(1, 2) void (*react)(const char *msg, ...); }; #endif @@ -50,7 +50,7 @@ struct rv_monitor { void (*disable)(void); void (*reset)(void); #ifdef CONFIG_RV_REACTORS - void (*react)(char *msg); + __printf(1, 2) void (*react)(const char *msg, ...); #endif }; @@ -64,6 +64,11 @@ void rv_put_task_monitor_slot(int slot); bool rv_reacting_on(void); int rv_unregister_reactor(struct rv_reactor *reactor); int rv_register_reactor(struct rv_reactor *reactor); +#else +static inline bool rv_reacting_on(void) +{ + return false; +} #endif /* CONFIG_RV_REACTORS */ #endif /* CONFIG_RV */ diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 510c88bfabd4..15f9ed4e4bb6 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -19,45 +19,22 @@ #ifdef CONFIG_RV_REACTORS #define DECLARE_RV_REACTING_HELPERS(name, type) \ -static char REACT_MSG_##name[1024]; \ - \ -static inline char *format_react_msg_##name(type curr_state, type event) \ -{ \ - snprintf(REACT_MSG_##name, 1024, \ - "rv: monitor %s does not allow event %s on state %s\n", \ - #name, \ - model_get_event_name_##name(event), \ - model_get_state_name_##name(curr_state)); \ - return REACT_MSG_##name; \ -} \ - \ -static void cond_react_##name(char *msg) \ +static void cond_react_##name(type curr_state, type event) \ { \ - if (rv_##name.react) \ - rv_##name.react(msg); \ -} \ - \ -static bool rv_reacting_on_##name(void) \ -{ \ - return rv_reacting_on(); \ + if (!rv_reacting_on() || !rv_##name.react) \ + return; \ + rv_##name.react("rv: monitor %s does not allow event %s on state %s\n", \ + #name, \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(curr_state)); \ } #else /* CONFIG_RV_REACTOR */ #define DECLARE_RV_REACTING_HELPERS(name, type) \ -static inline char *format_react_msg_##name(type curr_state, type event) \ -{ \ - return NULL; \ -} \ - \ -static void cond_react_##name(char *msg) \ +static void cond_react_##name(type curr_state, type event) \ { \ return; \ -} \ - \ -static bool rv_reacting_on_##name(void) \ -{ \ - return 0; \ } #endif @@ -170,8 +147,7 @@ da_event_##name(struct da_monitor *da_mon, enum events_##name event) \ return true; \ } \ \ - if (rv_reacting_on_##name()) \ - cond_react_##name(format_react_msg_##name(curr_state, event)); \ + cond_react_##name(curr_state, event); \ \ trace_error_##name(model_get_state_name_##name(curr_state), \ model_get_event_name_##name(event)); \ @@ -202,8 +178,7 @@ static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct return true; \ } \ \ - if (rv_reacting_on_##name()) \ - cond_react_##name(format_react_msg_##name(curr_state, event)); \ + cond_react_##name(curr_state, event); \ \ trace_error_##name(tsk->pid, \ model_get_state_name_##name(curr_state), \ -- cgit v1.2.3 From a9769a5b987838f03f3dd57b097794cd4c691098 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 9 Jul 2025 21:21:17 +0200 Subject: rv: Add support for LTL monitors While attempting to implement DA monitors for some complex specifications, deterministic automaton is found to be inappropriate as the specification language. The automaton is complicated, hard to understand, and error-prone. For these cases, linear temporal logic is more suitable as the specification language. Add support for linear temporal logic runtime verification monitor. Cc: John Ogness Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Gabriele Monaco Link: https://lore.kernel.org/d366c1fed60ed4e8f6451f3c15a99755f2740b5f.1752088709.git.namcao@linutronix.de Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 63 ++++++++++++++-- include/rv/ltl_monitor.h | 184 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+), 4 deletions(-) create mode 100644 include/rv/ltl_monitor.h (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 9428e62eb8e9..1d5579f9b75a 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -10,6 +10,10 @@ #define MAX_DA_NAME_LEN 32 #ifdef CONFIG_RV +#include +#include +#include + /* * Deterministic automaton per-object variables. */ @@ -18,6 +22,59 @@ struct da_monitor { unsigned int curr_state; }; +#ifdef CONFIG_RV_LTL_MONITOR + +/* + * In the future, if the number of atomic propositions or the size of Buchi + * automaton is larger, we can switch to dynamic allocation. For now, the code + * is simpler this way. + */ +#define RV_MAX_LTL_ATOM 32 +#define RV_MAX_BA_STATES 32 + +/** + * struct ltl_monitor - A linear temporal logic runtime verification monitor + * @states: States in the Buchi automaton. As Buchi automaton is a + * non-deterministic state machine, the monitor can be in multiple + * states simultaneously. This is a bitmask of all possible states. + * If this is zero, that means either: + * - The monitor has not started yet (e.g. because not all + * atomic propositions are known). + * - There is no possible state to be in. In other words, a + * violation of the LTL property is detected. + * @atoms: The values of atomic propositions. + * @unknown_atoms: Atomic propositions which are still unknown. + */ +struct ltl_monitor { + DECLARE_BITMAP(states, RV_MAX_BA_STATES); + DECLARE_BITMAP(atoms, RV_MAX_LTL_ATOM); + DECLARE_BITMAP(unknown_atoms, RV_MAX_LTL_ATOM); +}; + +static inline bool rv_ltl_valid_state(struct ltl_monitor *mon) +{ + for (int i = 0; i < ARRAY_SIZE(mon->states); ++i) { + if (mon->states[i]) + return true; + } + return false; +} + +static inline bool rv_ltl_all_atoms_known(struct ltl_monitor *mon) +{ + for (int i = 0; i < ARRAY_SIZE(mon->unknown_atoms); ++i) { + if (mon->unknown_atoms[i]) + return false; + } + return true; +} + +#else + +struct ltl_monitor {}; + +#endif /* CONFIG_RV_LTL_MONITOR */ + /* * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. * If we find justification for more monitors, we can think about @@ -27,11 +84,9 @@ struct da_monitor { #define RV_PER_TASK_MONITORS 1 #define RV_PER_TASK_MONITOR_INIT (RV_PER_TASK_MONITORS) -/* - * Futher monitor types are expected, so make this a union. - */ union rv_task_monitor { - struct da_monitor da_mon; + struct da_monitor da_mon; + struct ltl_monitor ltl_mon; }; #ifdef CONFIG_RV_REACTORS diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h new file mode 100644 index 000000000000..9a583125b566 --- /dev/null +++ b/include/rv/ltl_monitor.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** + * This file must be combined with the $(MODEL_NAME).h file generated by + * tools/verification/rvgen. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef MONITOR_NAME +#error "Please include $(MODEL_NAME).h generated by rvgen" +#endif + +#ifdef CONFIG_RV_REACTORS +#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME) +static struct rv_monitor RV_MONITOR_NAME; + +static void rv_cond_react(struct task_struct *task) +{ + if (!rv_reacting_on() || !RV_MONITOR_NAME.react) + return; + RV_MONITOR_NAME.react("rv: "__stringify(MONITOR_NAME)": %s[%d]: violation detected\n", + task->comm, task->pid); +} +#else +static void rv_cond_react(struct task_struct *task) +{ +} +#endif + +static int ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT; + +static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon); +static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation); + +static struct ltl_monitor *ltl_get_monitor(struct task_struct *task) +{ + return &task->rv[ltl_monitor_slot].ltl_mon; +} + +static void ltl_task_init(struct task_struct *task, bool task_creation) +{ + struct ltl_monitor *mon = ltl_get_monitor(task); + + memset(&mon->states, 0, sizeof(mon->states)); + + for (int i = 0; i < LTL_NUM_ATOM; ++i) + __set_bit(i, mon->unknown_atoms); + + ltl_atoms_init(task, mon, task_creation); + ltl_atoms_fetch(task, mon); +} + +static void handle_task_newtask(void *data, struct task_struct *task, unsigned long flags) +{ + ltl_task_init(task, true); +} + +static int ltl_monitor_init(void) +{ + struct task_struct *g, *p; + int ret, cpu; + + ret = rv_get_task_monitor_slot(); + if (ret < 0) + return ret; + + ltl_monitor_slot = ret; + + rv_attach_trace_probe(name, task_newtask, handle_task_newtask); + + read_lock(&tasklist_lock); + + for_each_process_thread(g, p) + ltl_task_init(p, false); + + for_each_present_cpu(cpu) + ltl_task_init(idle_task(cpu), false); + + read_unlock(&tasklist_lock); + + return 0; +} + +static void ltl_monitor_destroy(void) +{ + rv_detach_trace_probe(name, task_newtask, handle_task_newtask); + + rv_put_task_monitor_slot(ltl_monitor_slot); + ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT; +} + +static void ltl_illegal_state(struct task_struct *task, struct ltl_monitor *mon) +{ + CONCATENATE(trace_error_, MONITOR_NAME)(task); + rv_cond_react(task); +} + +static void ltl_attempt_start(struct task_struct *task, struct ltl_monitor *mon) +{ + if (rv_ltl_all_atoms_known(mon)) + ltl_start(task, mon); +} + +static inline void ltl_atom_set(struct ltl_monitor *mon, enum ltl_atom atom, bool value) +{ + __clear_bit(atom, mon->unknown_atoms); + if (value) + __set_bit(atom, mon->atoms); + else + __clear_bit(atom, mon->atoms); +} + +static void +ltl_trace_event(struct task_struct *task, struct ltl_monitor *mon, unsigned long *next_state) +{ + const char *format_str = "%s"; + DECLARE_SEQ_BUF(atoms, 64); + char states[32], next[32]; + int i; + + if (!CONCATENATE(CONCATENATE(trace_event_, MONITOR_NAME), _enabled)()) + return; + + snprintf(states, sizeof(states), "%*pbl", RV_MAX_BA_STATES, mon->states); + snprintf(next, sizeof(next), "%*pbl", RV_MAX_BA_STATES, next_state); + + for (i = 0; i < LTL_NUM_ATOM; ++i) { + if (test_bit(i, mon->atoms)) { + seq_buf_printf(&atoms, format_str, ltl_atom_str(i)); + format_str = ",%s"; + } + } + + CONCATENATE(trace_event_, MONITOR_NAME)(task, states, atoms.buffer, next); +} + +static void ltl_validate(struct task_struct *task, struct ltl_monitor *mon) +{ + DECLARE_BITMAP(next_states, RV_MAX_BA_STATES) = {0}; + + if (!rv_ltl_valid_state(mon)) + return; + + for (unsigned int i = 0; i < RV_NUM_BA_STATES; ++i) { + if (test_bit(i, mon->states)) + ltl_possible_next_states(mon, i, next_states); + } + + ltl_trace_event(task, mon, next_states); + + memcpy(mon->states, next_states, sizeof(next_states)); + + if (!rv_ltl_valid_state(mon)) + ltl_illegal_state(task, mon); +} + +static void ltl_atom_update(struct task_struct *task, enum ltl_atom atom, bool value) +{ + struct ltl_monitor *mon = ltl_get_monitor(task); + + ltl_atom_set(mon, atom, value); + ltl_atoms_fetch(task, mon); + + if (!rv_ltl_valid_state(mon)) + ltl_attempt_start(task, mon); + + ltl_validate(task, mon); +} + +static void __maybe_unused ltl_atom_pulse(struct task_struct *task, enum ltl_atom atom, bool value) +{ + struct ltl_monitor *mon = ltl_get_monitor(task); + + ltl_atom_update(task, atom, value); + + ltl_atom_set(mon, atom, !value); + ltl_validate(task, mon); +} -- cgit v1.2.3 From fac5493251a680cb74343895d0e76843624a90d8 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 9 Jul 2025 21:21:23 +0200 Subject: rv: Allow to configure the number of per-task monitor Now that there are 2 monitors for real-time applications, users may want to enable both of them simultaneously. Make the number of per-task monitor configurable. Default it to 2 for now. Cc: John Ogness Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/93e83313fc4ba7f6e66f4abe80ca5f5494d658d0.1752088709.git.namcao@linutronix.de Reviewed-by: Gabriele Monaco Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 9 +-------- include/linux/sched.h | 8 +++----- 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 1d5579f9b75a..97baf58d88b2 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -75,14 +75,7 @@ struct ltl_monitor {}; #endif /* CONFIG_RV_LTL_MONITOR */ -/* - * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. - * If we find justification for more monitors, we can think about - * adding more or developing a dynamic method. So far, none of - * these are justified. - */ -#define RV_PER_TASK_MONITORS 1 -#define RV_PER_TASK_MONITOR_INIT (RV_PER_TASK_MONITORS) +#define RV_PER_TASK_MONITOR_INIT (CONFIG_RV_PER_TASK_MONITORS) union rv_task_monitor { struct da_monitor da_mon; diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..fabd7fe1a07a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1642,12 +1642,10 @@ struct task_struct { #ifdef CONFIG_RV /* - * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. - * If we find justification for more monitors, we can think - * about adding more or developing a dynamic method. So far, - * none of these are justified. + * Per-task RV monitor, fixed in CONFIG_RV_PER_TASK_MONITORS. + * If memory becomes a concern, we can think about a dynamic method. */ - union rv_task_monitor rv[RV_PER_TASK_MONITORS]; + union rv_task_monitor rv[CONFIG_RV_PER_TASK_MONITORS]; #endif #ifdef CONFIG_USER_EVENTS -- cgit v1.2.3 From 878e1e94a8aafb2f93a333a1aaed5e1c5f17e339 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 20 Jun 2025 17:30:20 -0700 Subject: tracing/sched: Remove obsolete comment on suffixes Commit ac01fa73f530 ("tracepoint: Have tracepoints created with DECLARE_ TRACE() have _tp suffix") makes it unnecessary to manually add a suffix. Remove a now obsolete comment. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Ricardo Neri Link: https://lore.kernel.org/20250620-rneri-tp-comment-fix-v1-1-e0f6495ac33c@linux.intel.com Signed-off-by: Ricardo Neri Signed-off-by: Steven Rostedt (Google) --- include/trace/events/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4e6b2910cec3..f24c373bcc44 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -829,8 +829,6 @@ TRACE_EVENT(sched_wake_idle_without_ipi, /* * Following tracepoints are not exported in tracefs and provide hooking * mechanisms only for testing and debugging purposes. - * - * Postfixed with _tp to make them easily identifiable in the code. */ DECLARE_TRACE(pelt_cfs, TP_PROTO(struct cfs_rq *cfs_rq), -- cgit v1.2.3 From c0ef1446959101d23fdf1b1bdefc6613a83dba03 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 4 Jul 2025 20:21:53 +0200 Subject: devlink: Add support for u64 parameters Only 8, 16 and 32-bit integers are supported for numeric devlink parameters. The subsequent patch adds support for DPLL clock ID that is defined as 64-bit number. Add support for u64 parameter type. Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250704182202.1641943-4-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index d0ce5a7e984c..4a5896b846a4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -425,6 +425,7 @@ enum devlink_param_type { DEVLINK_PARAM_TYPE_U8 = DEVLINK_VAR_ATTR_TYPE_U8, DEVLINK_PARAM_TYPE_U16 = DEVLINK_VAR_ATTR_TYPE_U16, DEVLINK_PARAM_TYPE_U32 = DEVLINK_VAR_ATTR_TYPE_U32, + DEVLINK_PARAM_TYPE_U64 = DEVLINK_VAR_ATTR_TYPE_U64, DEVLINK_PARAM_TYPE_STRING = DEVLINK_VAR_ATTR_TYPE_STRING, DEVLINK_PARAM_TYPE_BOOL = DEVLINK_VAR_ATTR_TYPE_FLAG, }; @@ -433,6 +434,7 @@ union devlink_param_value { u8 vu8; u16 vu16; u32 vu32; + u64 vu64; char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; -- cgit v1.2.3 From de9ccf2296ac323a571e442b5730ca9cc259fbf0 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 4 Jul 2025 20:21:54 +0200 Subject: devlink: Add new "clock_id" generic device param Add a new device generic parameter to specify clock ID that should be used by the device for registering DPLL devices and pins. Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250704182202.1641943-5-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 4a5896b846a4..93640a29427c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -525,6 +525,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, + DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -586,6 +587,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME "enable_phc" #define DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME "clock_id" +#define DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE DEVLINK_PARAM_TYPE_U64 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From 67c0170566b55b1f6ee3567c94ff679104277e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20-=202N?= Date: Tue, 8 Jul 2025 11:01:37 +0200 Subject: net: phy: MII-Lite PHY interface mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Broadcom PHYs are capable to operate in simplified MII mode, without TXER, RXER, CRS and COL signals as defined for the MII. The MII-Lite mode can be used on most Ethernet controllers with full MII interface by just leaving the input signals (RXER, CRS, COL) inactive. The absence of COL signal makes half-duplex link modes impossible but does not interfere with BroadR-Reach link modes on Broadcom PHYs, because they are all full-duplex only. Add MII-Lite interface mode, especially for Broadcom two-wire PHYs. Signed-off-by: Kamil Horák - 2N Reviewed-by: Maxime Chevallier Reviewed-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250708090140.61355-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 543a94751a6b..4c2b8b6e7187 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -106,6 +106,7 @@ extern const int phy_basic_ports_array[3]; * @PHY_INTERFACE_MODE_50GBASER: 50GBase-R - with Clause 134 FEC * @PHY_INTERFACE_MODE_LAUI: 50 Gigabit Attachment Unit Interface * @PHY_INTERFACE_MODE_100GBASEP: 100GBase-P - with Clause 134 FEC + * @PHY_INTERFACE_MODE_MIILITE: MII-Lite - MII without RXER TXER CRS COL * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -150,6 +151,7 @@ typedef enum { PHY_INTERFACE_MODE_50GBASER, PHY_INTERFACE_MODE_LAUI, PHY_INTERFACE_MODE_100GBASEP, + PHY_INTERFACE_MODE_MIILITE, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -272,6 +274,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "laui"; case PHY_INTERFACE_MODE_100GBASEP: return "100gbase-p"; + case PHY_INTERFACE_MODE_MIILITE: + return "mii-lite"; default: return "unknown"; } -- cgit v1.2.3 From 34bf222824f6c9ac03620ee18aaf93d3b6138db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20-=202N?= Date: Tue, 8 Jul 2025 11:01:39 +0200 Subject: net: phy: bcm5481x: MII-Lite activation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadcom PHYs featuring the BroadR-Reach two-wire link mode are usually capable to operate in simplified MII mode, without TXER, RXER, CRS and COL signals as defined for the MII. The absence of COL signal makes half-duplex link modes impossible, however, the BroadR-Reach modes are all full-duplex only. Depending on the IC encapsulation, there exist MII-Lite-only PHYs such as bcm54811 in MLP. The PHY itself is hardware-strapped to select among multiple RGMII and MII-Lite modes, but the MII-Lite mode must be also activated by software. Add MII-Lite activation for bcm5481x PHYs. Signed-off-by: Kamil Horák - 2N Reviewed-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250708090140.61355-4-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/linux/brcmphy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 028b3e00378e..15c35655f482 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -182,6 +182,12 @@ #define BCM_LED_MULTICOLOR_ACT 0x9 #define BCM_LED_MULTICOLOR_PROGRAM 0xa +/* + * Broadcom Synchronous Ethernet Controls (expansion register 0x0E) + */ +#define BCM_EXP_SYNC_ETHERNET (MII_BCM54XX_EXP_SEL_ER + 0x0E) +#define BCM_EXP_SYNC_ETHERNET_MII_LITE BIT(11) + /* * BCM5482: Shadow registers * Shadow values go into bits [14:10] of register 0x1c to select a shadow -- cgit v1.2.3 From 82241a83cd15aaaf28200a40ad1a8b480012edaf Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 5 Jun 2025 20:58:29 +0800 Subject: mm: fix the inaccurate memory statistics issue for users On some large machines with a high number of CPUs running a 64K pagesize kernel, we found that the 'RES' field is always 0 displayed by the top command for some processes, which will cause a lot of confusion for users. PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 875525 root 20 0 12480 0 0 R 0.3 0.0 0:00.08 top 1 root 20 0 172800 0 0 S 0.0 0.0 0:04.52 systemd The main reason is that the batch size of the percpu counter is quite large on these machines, caching a significant percpu value, since converting mm's rss stats into percpu_counter by commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"). Intuitively, the batch number should be optimized, but on some paths, performance may take precedence over statistical accuracy. Therefore, introducing a new interface to add the percpu statistical count and display it to users, which can remove the confusion. In addition, this change is not expected to be on a performance-critical path, so the modification should be acceptable. In addition, the 'mm->rss_stat' is updated by using add_mm_counter() and dec/inc_mm_counter(), which are all wrappers around percpu_counter_add_batch(). In percpu_counter_add_batch(), there is percpu batch caching to avoid 'fbc->lock' contention. This patch changes task_mem() and task_statm() to get the accurate mm counters under the 'fbc->lock', but this should not exacerbate kernel 'mm->rss_stat' lock contention due to the percpu batch caching of the mm counters. The following test also confirm the theoretical analysis. I run the stress-ng that stresses anon page faults in 32 threads on my 32 cores machine, while simultaneously running a script that starts 32 threads to busy-loop pread each stress-ng thread's /proc/pid/status interface. From the following data, I did not observe any obvious impact of this patch on the stress-ng tests. w/o patch: stress-ng: info: [6848] 4,399,219,085,152 CPU Cycles 67.327 B/sec stress-ng: info: [6848] 1,616,524,844,832 Instructions 24.740 B/sec (0.367 instr. per cycle) stress-ng: info: [6848] 39,529,792 Page Faults Total 0.605 M/sec stress-ng: info: [6848] 39,529,792 Page Faults Minor 0.605 M/sec w/patch: stress-ng: info: [2485] 4,462,440,381,856 CPU Cycles 68.382 B/sec stress-ng: info: [2485] 1,615,101,503,296 Instructions 24.750 B/sec (0.362 instr. per cycle) stress-ng: info: [2485] 39,439,232 Page Faults Total 0.604 M/sec stress-ng: info: [2485] 39,439,232 Page Faults Minor 0.604 M/sec On comparing a very simple app which just allocates & touches some memory against v6.1 (which doesn't have f1a7941243c1) and latest Linus tree (4c06e63b9203) I can see that on latest Linus tree the values for VmRSS, RssAnon and RssFile from /proc/self/status are all zeroes while they do report values on v6.1 and a Linus tree with this patch. Link: https://lkml.kernel.org/r/f4586b17f66f97c174f7fd1f8647374fdb53de1c.1749119050.git.baolin.wang@linux.alibaba.com Fixes: f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter") Signed-off-by: Baolin Wang Reviewed-by: Aboorva Devarajan Tested-by: Aboorva Devarajan Tested-by Donet Tom Acked-by: Shakeel Butt Acked-by: SeongJae Park Acked-by: Michal Hocko Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) -- cgit v1.2.3 From db6cc3f4ac2e6cdc898fc9cbc8b32ae1bf56bdad Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 4 Jul 2025 21:56:20 +0800 Subject: Revert "sched/numa: add statistics of numa balance task" This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382. This commit introduces per-memcg/task NUMA balance statistics, but unfortunately it introduced a NULL pointer exception due to the following race condition: After a swap task candidate was chosen, its mm_struct pointer was set to NULL due to task exit. Later, when performing the actual task swapping, the p->mm caused the problem. CPU0 CPU1 : ... task_numa_migrate task_numa_find_cpu task_numa_compare # a normal task p is chosen env->best_task = p # p exit: exit_signals(p); p->flags |= PF_EXITING exit_mm p->mm = NULL; migrate_swap_stop __migrate_swap_task((arg->src_task, arg->dst_cpu) count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL task_lock() should be held and the PF_EXITING flag needs to be checked to prevent this from happening. After discussion, the conclusion was that adding a lock is not worthwhile for some statistics calculations. Revert the change and rely on the tracepoint for this purpose. Link: https://lkml.kernel.org/r/20250704135620.685752-1-yu.c.chen@intel.com Link: https://lkml.kernel.org/r/20250708064917.BBD13C4CEED@smtp.kernel.org Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task") Signed-off-by: Chen Yu Reported-by: Jirka Hladky Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/ Reported-by: Srikanth Aithal Reported-by: Suneeth D Acked-by: Michal Hocko Cc: Borislav Petkov Cc: Ingo Molnar Cc: Jiri Hladky Cc: Libo Chen Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/sched.h | 4 ---- include/linux/vm_event_item.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, -- cgit v1.2.3 From de195c67bfcbc770bb6327bbfd3010f1c371216a Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 29 May 2025 18:15:45 +0100 Subject: mm: ksm: have KSM VMA checks not require a VMA pointer Patch series "mm: ksm: prevent KSM from breaking merging of new VMAs", v3. When KSM-by-default is established using prctl(PR_SET_MEMORY_MERGE), this defaults all newly mapped VMAs to having VM_MERGEABLE set, and thus makes them available to KSM for samepage merging. It also sets VM_MERGEABLE in all existing VMAs. However this causes an issue upon mapping of new VMAs - the initial flags will never have VM_MERGEABLE set when attempting a merge with adjacent VMAs (this is set later in the mmap() logic), and adjacent VMAs will ALWAYS have VM_MERGEABLE set. This renders all newly mapped VMAs unmergeable. To avoid this, this series performs the check for PR_SET_MEMORY_MERGE far earlier in the mmap() logic, prior to the merge being attempted. However we run into complexity with the depreciated .mmap() callback - if a driver hooks this, it might change flags which adjust KSM merge eligibility. We have to worry about this because, while KSM is only applicable to private mappings, this includes both anonymous and MAP_PRIVATE-mapped file-backed mappings. This isn't a problem for brk(), where the VMA must be anonymous. However in mmap() we must be conservative - if the VMA is anonymous then we can always proceed, however if not, we permit only shmem mappings (whose .mmap hook does not affect KSM eligibility) and drivers which implement .mmap_prepare() (invoked prior to the KSM eligibility check). If we can't be sure of the driver changing things, then we maintain the same behaviour of performing the KSM check later in the mmap() logic (and thus losing new VMA mergeability). A great many use-cases for this logic will use anonymous mappings any rate, so this change should already cover the majority of actual KSM use-cases. This patch (of 4): In subsequent commits we are going to determine KSM eligibility prior to a VMA being constructed, at which point we will of course not yet have access to a VMA pointer. It is trivial to boil down the check logic to be parameterised on mm_struct, file and VMA flags, so do so. As a part of this change, additionally expose and use file_is_dax() to determine whether a file is being mapped under a DAX inode. Link: https://lkml.kernel.org/r/cover.1748537921.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/36ad13eb50cdbd8aac6dcfba22c65d5031667295.1748537921.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Chengming Zhou Reviewed-by: Vlastimil Babka Reviewed-by: Xu Xin Reviewed-by: Liam R. Howlett Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Stefan Roesch Signed-off-by: Andrew Morton --- include/linux/fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 040c0036320f..62634af97da6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3726,9 +3726,14 @@ void setattr_copy(struct mnt_idmap *, struct inode *inode, extern int file_update_time(struct file *file); +static inline bool file_is_dax(const struct file *file) +{ + return file && IS_DAX(file->f_mapping->host); +} + static inline bool vma_is_dax(const struct vm_area_struct *vma) { - return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); + return file_is_dax(vma->vm_file); } static inline bool vma_is_fsdax(struct vm_area_struct *vma) -- cgit v1.2.3 From cf7e7a3503df0b71afd68ee84e9a09d4514cc2dd Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 29 May 2025 18:15:47 +0100 Subject: mm: prevent KSM from breaking VMA merging for new VMAs If a user wishes to enable KSM mergeability for an entire process and all fork/exec'd processes that come after it, they use the prctl() PR_SET_MEMORY_MERGE operation. This defaults all newly mapped VMAs to have the VM_MERGEABLE VMA flag set (in order to indicate they are KSM mergeable), as well as setting this flag for all existing VMAs and propagating this across fork/exec. However it also breaks VMA merging for new VMAs, both in the process and all forked (and fork/exec'd) child processes. This is because when a new mapping is proposed, the flags specified will never have VM_MERGEABLE set. However all adjacent VMAs will already have VM_MERGEABLE set, rendering VMAs unmergeable by default. To work around this, we try to set the VM_MERGEABLE flag prior to attempting a merge. In the case of brk() this can always be done. However on mmap() things are more complicated - while KSM is not supported for MAP_SHARED file-backed mappings, it is supported for MAP_PRIVATE file-backed mappings. These mappings may have deprecated .mmap() callbacks specified which could, in theory, adjust flags and thus KSM eligibility. So we check to determine whether this is possible. If not, we set VM_MERGEABLE prior to the merge attempt on mmap(), otherwise we retain the previous behaviour. This fixes VMA merging for all new anonymous mappings, which covers the majority of real-world cases, so we should see a significant improvement in VMA mergeability. For MAP_PRIVATE file-backed mappings, those which implement the .mmap_prepare() hook and shmem are both known to be safe, so we allow these, disallowing all other cases. Also add stubs for newly introduced function invocations to VMA userland testing. [lorenzo.stoakes@oracle.com: correctly invoke late KSM check after mmap hook] Link: https://lkml.kernel.org/r/5861f8f6-cf5a-4d82-a062-139fb3f9cddb@lucifer.local Link: https://lkml.kernel.org/r/3ba660af716d87a18ca5b4e635f2101edeb56340.1748537921.git.lorenzo.stoakes@oracle.com Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process") # please no backport! Signed-off-by: Lorenzo Stoakes Reviewed-by: Chengming Zhou Acked-by: David Hildenbrand Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Reviewed-by: Xu Xin Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Stefan Roesch Signed-off-by: Andrew Morton --- include/linux/ksm.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index d73095b5cd96..51787f0b0208 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -17,8 +17,8 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); - -void ksm_add_vma(struct vm_area_struct *vma); +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, + vm_flags_t vm_flags); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); int ksm_disable(struct mm_struct *mm); @@ -97,8 +97,10 @@ bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ -static inline void ksm_add_vma(struct vm_area_struct *vma) +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, + const struct file *file, vm_flags_t vm_flags) { + return vm_flags; } static inline int ksm_disable(struct mm_struct *mm) -- cgit v1.2.3 From e399a07a8a527dd023ca720a338fc508bedd364f Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 11 Apr 2025 10:17:45 -0600 Subject: mm: remove unused mmap tracepoints The vma_mas_szero and vma_store tracepoints are unused since commit fbcc3104b843 ("mmap: convert __vma_adjust() to use vma iterator"). Remove them so they are no longer listed as available tracepoints. Link: https://lkml.kernel.org/r/20250411161746.1043239-1-csander@purestorage.com Signed-off-by: Caleb Sander Mateos Reported-by: Eric Mueller Reviewed-by: Liam R. Howlett Cc: Jann Horn Cc: Lorenzo Stoakes Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/trace/events/mmap.h | 52 --------------------------------------------- 1 file changed, 52 deletions(-) (limited to 'include') diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h index f8d61485de16..ee2843a5daef 100644 --- a/include/trace/events/mmap.h +++ b/include/trace/events/mmap.h @@ -43,58 +43,6 @@ TRACE_EVENT(vm_unmapped_area, __entry->align_offset) ); -TRACE_EVENT(vma_mas_szero, - TP_PROTO(struct maple_tree *mt, unsigned long start, - unsigned long end), - - TP_ARGS(mt, start, end), - - TP_STRUCT__entry( - __field(struct maple_tree *, mt) - __field(unsigned long, start) - __field(unsigned long, end) - ), - - TP_fast_assign( - __entry->mt = mt; - __entry->start = start; - __entry->end = end; - ), - - TP_printk("mt_mod %p, (NULL), SNULL, %lu, %lu,", - __entry->mt, - (unsigned long) __entry->start, - (unsigned long) __entry->end - ) -); - -TRACE_EVENT(vma_store, - TP_PROTO(struct maple_tree *mt, struct vm_area_struct *vma), - - TP_ARGS(mt, vma), - - TP_STRUCT__entry( - __field(struct maple_tree *, mt) - __field(struct vm_area_struct *, vma) - __field(unsigned long, vm_start) - __field(unsigned long, vm_end) - ), - - TP_fast_assign( - __entry->mt = mt; - __entry->vma = vma; - __entry->vm_start = vma->vm_start; - __entry->vm_end = vma->vm_end - 1; - ), - - TP_printk("mt_mod %p, (%p), STORE, %lu, %lu,", - __entry->mt, __entry->vma, - (unsigned long) __entry->vm_start, - (unsigned long) __entry->vm_end - ) -); - - TRACE_EVENT(exit_mmap, TP_PROTO(struct mm_struct *mm), -- cgit v1.2.3 From 792b429db7e0217faf7bce9fe46e7708135cf83c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Jun 2025 16:05:44 +0200 Subject: mm/gup: remove (VM_)BUG_ONs Especially once we hit one of the assertions in sanity_check_pinned_pages(), observing follow-up assertions failing in other code can give good clues about what went wrong, so use VM_WARN_ON_ONCE instead. While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as well. Add one comment for the pfn_valid() check. We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly. Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns something > 0 we're in bigger trouble. Convert the other BUG_ON's into VM_WARN_ON_ONCE as well, they are in a similar domain "should never happen", but more reasonable to check for during early testing. [david@redhat.com: use the _FOLIO variant where possible, per Lorenzo] Link: https://lkml.kernel.org/r/844bd929-a551-48e3-a12e-285cd65ba580@redhat.com Link: https://lkml.kernel.org/r/20250604140544.688711-1-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Reviewed-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Acked-by: SeongJae Park Reviewed-by: Liam R. Howlett Acked-by: Michal Hocko Reviewed-by: John Hubbard Cc: Mike Rapoport Cc: Jason Gunthorpe Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/mmdebug.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index a0a3894900ed..14a45979cccc 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_ONCE_VMA(cond, vma) ({ \ + static bool __section(".data..once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_vma(vma); \ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) #define VM_WARN_ON_VMG(cond, vmg) ({ \ int __ret_warn = !!(cond); \ \ @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); #define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_MM(cond, mm) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_VMA(cond, vma) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_VMG(cond, vmg) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) -- cgit v1.2.3 From 3800d55250976b7a4bd42c255b267bc242669709 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 4 Jun 2025 17:14:27 -0400 Subject: mm: rename CONFIG_PAGE_BLOCK_ORDER to CONFIG_PAGE_BLOCK_MAX_ORDER The config is in fact an additional upper limit of pageblock_order, so rename it to avoid confusion. Link: https://lkml.kernel.org/r/20250604211427.1590859-1-ziy@nvidia.com Signed-off-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Juan Yescas Reviewed-by: Anshuman Khandual Acked-by: Vlastimil Babka Cc: "Isaac J. Manjarres" Cc: Kalesh Singh Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Masahiro Yamada Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Suren Baghdasaryan Cc: T.J. Mercier Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 14 +++++++------- include/linux/pageblock-flags.h | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 283913d42d7b..5bec8b1d0e66 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -38,19 +38,19 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) /* Defines the order for the number of pages that have a migrate type. */ -#ifndef CONFIG_PAGE_BLOCK_ORDER -#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#ifndef CONFIG_PAGE_BLOCK_MAX_ORDER +#define PAGE_BLOCK_MAX_ORDER MAX_PAGE_ORDER #else -#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER -#endif /* CONFIG_PAGE_BLOCK_ORDER */ +#define PAGE_BLOCK_MAX_ORDER CONFIG_PAGE_BLOCK_MAX_ORDER +#endif /* CONFIG_PAGE_BLOCK_MAX_ORDER */ /* * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated - * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_MAX_ORDER, * which defines the order for the number of pages that can have a migrate type */ -#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) -#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#if (PAGE_BLOCK_MAX_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_MAX_ORDER #endif /* diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e73a4292ef02..6297c6343c55 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,18 +41,18 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_MAX_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_MAX_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* If huge pages are not used, group by PAGE_BLOCK_ORDER */ -#define pageblock_order PAGE_BLOCK_ORDER +/* If huge pages are not used, group by PAGE_BLOCK_MAX_ORDER */ +#define pageblock_order PAGE_BLOCK_MAX_ORDER #endif /* CONFIG_HUGETLB_PAGE */ -- cgit v1.2.3 From 44b1b073eb36143ec65a918c0fbaa582f3ec2aa1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2025 07:49:38 +0200 Subject: mm: stop passing a writeback_control structure to shmem_writeout shmem_writeout only needs the swap_iocb cookie and the split folio list. Pass those explicitly and remove the now unused list member from struct writeback_control. Link: https://lkml.kernel.org/r/20250610054959.2057526-3-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Baolin Wang Cc: Chengming Zhou Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 5 ++++- include/linux/writeback.h | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 5f03a39a26f7..6d0f9c599ff7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -11,6 +11,8 @@ #include #include +struct swap_iocb; + /* inode in-kernel data */ #ifdef CONFIG_TMPFS_QUOTA @@ -107,7 +109,8 @@ static inline bool shmem_mapping(struct address_space *mapping) void shmem_unlock_mapping(struct address_space *mapping); struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -int shmem_writeout(struct folio *folio, struct writeback_control *wbc); +int shmem_writeout(struct folio *folio, struct swap_iocb **plug, + struct list_head *folio_list); void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eda4b62511f7..82f217970092 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,9 +79,6 @@ struct writeback_control { */ struct swap_iocb **swap_plug; - /* Target list for splitting a large folio */ - struct list_head *list; - /* internal fields used by the ->writepages implementation: */ struct folio_batch fbatch; pgoff_t index; -- cgit v1.2.3 From 624043dbd5be03cc5a2b9175c3934e6fb0ef7c70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2025 07:49:41 +0200 Subject: mm: stop passing a writeback_control structure to swap_writeout swap_writeout only needs the swap_iocb cookie from the writeback_control structure, so pass it explicitly. Link: https://lkml.kernel.org/r/20250610054959.2057526-6-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Baolin Wang Cc: Chengming Zhou Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- include/linux/writeback.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 82f217970092..9e960f2faf79 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -72,13 +72,6 @@ struct writeback_control { */ unsigned no_cgroup_owner:1; - /* To enable batching of swap writes to non-block-device backends, - * "plug" can be set point to a 'struct swap_iocb *'. When all swap - * writes have been submitted, if with swap_iocb is not NULL, - * swap_write_unplug() should be called. - */ - struct swap_iocb **swap_plug; - /* internal fields used by the ->writepages implementation: */ struct folio_batch fbatch; pgoff_t index; -- cgit v1.2.3 From a8fb49c6abbbe5c71e1a8a888ef2c4b3e341d169 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2025 07:49:42 +0200 Subject: mm: remove the for_reclaim field from struct writeback_control This field is now only set to one in the i915 gem code that only calls writeback_iter on it, which ignores the flag. All other checks are thuse dead code and the field can be removed. Link: https://lkml.kernel.org/r/20250610054959.2057526-7-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Baolin Wang Cc: Chengming Zhou Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- include/linux/writeback.h | 1 - include/trace/events/btrfs.h | 7 ++----- include/trace/events/writeback.h | 8 ++------ 3 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9e960f2faf79..a2848d731a46 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -59,7 +59,6 @@ struct writeback_control { unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_background:1; /* A background writeback */ unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ - unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index bebc252db865..0adc40f5e72b 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -686,7 +686,6 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __field( loff_t, range_start ) __field( loff_t, range_end ) __field( char, for_kupdate ) - __field( char, for_reclaim ) __field( char, range_cyclic ) __field( unsigned long, writeback_index ) __field( u64, root_objectid ) @@ -700,7 +699,6 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __entry->range_start = wbc->range_start; __entry->range_end = wbc->range_end; __entry->for_kupdate = wbc->for_kupdate; - __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); @@ -709,13 +707,12 @@ DECLARE_EVENT_CLASS(btrfs__writepage, TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu " "nr_to_write=%ld pages_skipped=%ld range_start=%llu " "range_end=%llu for_kupdate=%d " - "for_reclaim=%d range_cyclic=%d writeback_index=%lu", + "range_cyclic=%d writeback_index=%lu", show_root_type(__entry->root_objectid), __entry->ino, __entry->index, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, - __entry->for_kupdate, - __entry->for_reclaim, __entry->range_cyclic, + __entry->for_kupdate, __entry->range_cyclic, __entry->writeback_index) ); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 0ff388131fc9..1e23919c0da9 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -459,7 +459,6 @@ DECLARE_EVENT_CLASS(wbc_class, __field(int, sync_mode) __field(int, for_kupdate) __field(int, for_background) - __field(int, for_reclaim) __field(int, range_cyclic) __field(long, range_start) __field(long, range_end) @@ -473,23 +472,20 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->sync_mode = wbc->sync_mode; __entry->for_kupdate = wbc->for_kupdate; __entry->for_background = wbc->for_background; - __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->range_start = (long)wbc->range_start; __entry->range_end = (long)wbc->range_end; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), - TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " - "bgrd=%d reclm=%d cyclic=%d " - "start=0x%lx end=0x%lx cgroup_ino=%lu", + TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d bgrd=%d " + "cyclic=%d start=0x%lx end=0x%lx cgroup_ino=%lu", __entry->name, __entry->nr_to_write, __entry->pages_skipped, __entry->sync_mode, __entry->for_kupdate, __entry->for_background, - __entry->for_reclaim, __entry->range_cyclic, __entry->range_start, __entry->range_end, -- cgit v1.2.3 From 4f745def815d86eece3614aa0cd10a25cf94fed4 Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Wed, 28 May 2025 12:18:00 -0500 Subject: drivers/base/node: optimize memory block registration to reduce boot time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "drivers/base/node.c: optimization and cleanups", v7. This patch (of 7) During node device initialization, `memory blocks` are registered under each NUMA node. The `memory blocks` to be registered are identified using the node's start and end PFNs, which are obtained from the node's pg_data However, not all PFNs within this range necessarily belong to the same node—some may belong to other nodes. Additionally, due to the discontiguous nature of physical memory, certain sections within a `memory block` may be absent. As a result, `memory blocks` that fall between a node's start and end PFNs may span across multiple nodes, and some sections within those blocks may be missing. `Memory blocks` have a fixed size, which is architecture dependent. Due to these considerations, the memory block registration is currently performed as follows: for_each_online_node(nid): start_pfn = pgdat->node_start_pfn; end_pfn = pgdat->node_start_pfn + node_spanned_pages; for_each_memory_block_between(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)) mem_blk = memory_block_id(pfn_to_section_nr(pfn)); pfn_mb_start=section_nr_to_pfn(mem_blk->start_section_nr) pfn_mb_end = pfn_start + memory_block_pfns - 1 for (pfn = pfn_mb_start; pfn < pfn_mb_end; pfn++): if (get_nid_for_pfn(pfn) != nid): continue; else do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY); Here, we derive the start and end PFNs from the node's pg_data, then determine the memory blocks that may belong to the node. For each `memory block` in this range, we inspect all PFNs it contains and check their associated NUMA node ID. If a PFN within the block matches the current node, the memory block is registered under that node. If CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, get_nid_for_pfn() performs a binary search in the `memblock regions` to determine the NUMA node ID for a given PFN. If it is not enabled, the node ID is retrieved directly from the struct page. On large systems, this process can become time-consuming, especially since we iterate over each `memory block` and all PFNs within it until a match is found. When CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, the additional overhead of the binary search increases the execution time significantly, potentially leading to soft lockups during boot. In this patch, we iterate over `memblock region` to identify the `memory blocks` that belong to the current NUMA node. `memblock regions` are contiguous memory ranges, each associated with a single NUMA node, and they do not span across multiple nodes. for_each_memory_region(r): // r => region if (!node_online(r->nid)): continue; else for_each_memory_block_between(r->base, r->base + r->size - 1): do_register_memory_block_under_node(r->nid, mem_blk, MEMINIT_EARLY); We iterate over all memblock regions, and if the node associated with the region is online, we calculate the start and end memory blocks based on the region's start and end PFNs. We then register all the memory blocks within that range under the region node. Test Results on My system with 32TB RAM ======================================= 1. Boot time with CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled. Without this patch ------------------ Startup finished in 1min 16.528s (kernel) With this patch --------------- Startup finished in 17.236s (kernel) - 78% Improvement 2. Boot time with CONFIG_DEFERRED_STRUCT_PAGE_INIT disabled. Without this patch ------------------ Startup finished in 28.320s (kernel) With this patch --------------- Startup finished in 15.621s (kernel) - 46% Improvement [donettom@linux.ibm.com: restore removed extra line] Link: https://lkml.kernel.org/r/20250609140354.467908-1-donettom@linux.ibm.com Link: https://lkml.kernel.org/r/2a0a05c2dffc62a742bf1dd030098be4ce99be28.1748452241.git.donettom@linux.ibm.com Link: https://lkml.kernel.org/r/2a0a05c2dffc62a742bf1dd030098be4ce99be28.1748452241.git.donettom@linux.ibm.com Signed-off-by: Donet Tom Acked-by: David Hildenbrand Acked-by: Oscar Salvador Acked-by: Mike Rapoport (Microsoft) Acked-by: Zi Yan Signed-off-by: Andrew Morton --- include/linux/memory.h | 18 ++++++++++++++++++ include/linux/node.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 5ec4e6d209b9..bd4440bc4a57 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -179,12 +179,30 @@ struct memory_group *memory_group_find_by_id(int mgid); typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, struct memory_group *excluded, void *arg); +struct memory_block *find_memory_block_by_id(unsigned long block_id); #define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri };\ register_memory_notifier(&fn##_mem_nb); \ }) +extern int sections_per_block; + +static inline unsigned long memory_block_id(unsigned long section_nr) +{ + return section_nr / sections_per_block; +} + +static inline unsigned long pfn_to_block_id(unsigned long pfn) +{ + return memory_block_id(pfn_to_section_nr(pfn)); +} + +static inline unsigned long phys_to_block_id(unsigned long phys) +{ + return pfn_to_block_id(PFN_DOWN(phys)); +} + #ifdef CONFIG_NUMA void memory_block_add_nid(struct memory_block *mem, int nid, enum meminit_context context); diff --git a/include/linux/node.h b/include/linux/node.h index 2b7517892230..485370f3bc17 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -120,6 +120,9 @@ static inline void register_memory_blocks_under_node(int nid, unsigned long star enum meminit_context context) { } +static inline void register_memory_blocks_under_nodes(void) +{ +} #endif extern void unregister_node(struct node *node); -- cgit v1.2.3 From ac24f6cd87d88150fc6c1fef904794571f62dc5e Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Wed, 28 May 2025 12:18:02 -0500 Subject: drivers/base/node: remove register_memory_blocks_under_node() function call from register_one_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit register_one_node() is now only called via cpu_up() → __try_online_node() during CPU hotplug operations to online a node. At this stage, the node has not yet had any memory added. As a result, there are no memory blocks to walk or register, so calling register_memory_blocks_under_node() is unnecessary. Therefore, the call to register_memory_blocks_under_node() has been removed from register_one_node(). Link: https://lkml.kernel.org/r/ecf07075b1a41015fcf58823997d5c2ed7b8c18f.1748452242.git.donettom@linux.ibm.com Signed-off-by: Donet Tom Acked-by: Oscar Salvador Acked-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/node.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 485370f3bc17..b15de78e0408 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -134,21 +134,7 @@ extern int __register_one_node(int nid); /* Registers an online node */ static inline int register_one_node(int nid) { - int error = 0; - - if (node_online(nid)) { - struct pglist_data *pgdat = NODE_DATA(nid); - unsigned long start_pfn = pgdat->node_start_pfn; - unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; - - error = __register_one_node(nid); - if (error) - return error; - register_memory_blocks_under_node(nid, start_pfn, end_pfn, - MEMINIT_EARLY); - } - - return error; + return __register_one_node(nid); } extern void unregister_one_node(int nid); -- cgit v1.2.3 From 10f09d82f8b7c25bbd0b6d5142ff6df6e634132d Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Wed, 28 May 2025 12:18:03 -0500 Subject: drivers/base/node: rename register_memory_blocks_under_node() and remove context argument The function register_memory_blocks_under_node() is now only called from the memory hotplug path, as register_memory_blocks_under_node_early() handles registration during early boot. Therefore, the context argument used to differentiate between early boot and hotplug is no longer needed and was removed. Since the function is only called from the hotplug path, we renamed register_memory_blocks_under_node() to register_memory_blocks_under_node_hotplug() Link: https://lkml.kernel.org/r/907c22292b0ee4975107876efc875c75c11badd9.1748452242.git.donettom@linux.ibm.com Signed-off-by: Donet Tom Acked-by: Oscar Salvador Acked-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Acked-by: Zi Yan Signed-off-by: Andrew Morton --- include/linux/node.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index b15de78e0408..75b036a100d2 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -111,13 +111,12 @@ struct memory_block; extern struct node *node_devices[]; #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) -void register_memory_blocks_under_node(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context); +void register_memory_blocks_under_node_hotplug(int nid, unsigned long start_pfn, + unsigned long end_pfn); #else -static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context) +static inline void register_memory_blocks_under_node_hotplug(int nid, + unsigned long start_pfn, + unsigned long end_pfn) { } static inline void register_memory_blocks_under_nodes(void) -- cgit v1.2.3 From a5352f8a40a8d1b385abeca0b2cff5d2468e31a1 Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Wed, 28 May 2025 12:18:04 -0500 Subject: drivers/base/node: rename __register_one_node() to register_one_node() The register_one_node() function was a simple wrapper around __register_one_node(). To simplify the code, register_one_node() has been removed, and __register_one_node() has been renamed to register_one_node(). Link: https://lkml.kernel.org/r/8262cd0f44eeb048a1fcd3ac8382760d7f7dea60.1748452242.git.donettom@linux.ibm.com Signed-off-by: Donet Tom Acked-by: David Hildenbrand Cc: Mike Rapoport (Microsoft) Cc: Oscar Salvador Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/node.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 75b036a100d2..88bceebcbfa5 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -128,14 +128,7 @@ extern void unregister_node(struct node *node); #ifdef CONFIG_NUMA extern void node_dev_init(void); /* Core of the node registration - only memory hotplug should use this */ -extern int __register_one_node(int nid); - -/* Registers an online node */ -static inline int register_one_node(int nid) -{ - return __register_one_node(nid); -} - +extern int register_one_node(int nid); extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); @@ -148,10 +141,6 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid, static inline void node_dev_init(void) { } -static inline int __register_one_node(int nid) -{ - return 0; -} static inline int register_one_node(int nid) { return 0; -- cgit v1.2.3 From 7208cc6497c2615ed5a334b52c92ae98bda91198 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 19 Jun 2025 21:24:23 -0400 Subject: userfaultfd: correctly prevent registering VM_DROPPABLE regions Patch series "mm: userfaultfd: assorted fixes and cleanups", v3. Two fixes and two cleanups for userfaultfd. Note that the third patch yields a small change in the ABI, but we seem to have concluded that it is acceptable in this case. This patch (of 4): vma_can_userfault() masks off non-userfaultfd VM flags from vm_flags. The vm_flags & VM_DROPPABLE test will then always be false, incorrectly allowing VM_DROPPABLE regions to be registered with userfaultfd. Additionally, vm_flags is not guaranteed to correspond to the actual VMA's flags. Fix this test by checking the VMA's flags directly. Link: https://lkml.kernel.org/r/20250619-uffd-fixes-v3-0-a7274d3bd5e4@columbia.edu Link: https://lore.kernel.org/linux-mm/5a875a3a-2243-4eab-856f-bc53ccfec3ea@redhat.com/ Link: https://lkml.kernel.org/r/20250619-uffd-fixes-v3-1-a7274d3bd5e4@columbia.edu Fixes: 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings") Signed-off-by: Tal Zussman Acked-by: David Hildenbrand Acked-by: Peter Xu Acked-by: Jason A. Donenfeld Cc: Al Viro Cc: Andrea Arcangeli Cc: Christian Brauner Cc: Jan Kara Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 75342022d144..f3b3d2c9dd5e 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -218,7 +218,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, { vm_flags &= __VM_UFFD_FLAGS; - if (vm_flags & VM_DROPPABLE) + if (vma->vm_flags & VM_DROPPABLE) return false; if ((vm_flags & VM_UFFD_MINOR) && -- cgit v1.2.3 From 5e00e31867d16e235bb693b900c85e86dc2c3464 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 19 Jun 2025 21:24:26 -0400 Subject: userfaultfd: remove UFFD_CLOEXEC, UFFD_NONBLOCK, and UFFD_FLAGS_SET UFFD_CLOEXEC, UFFD_NONBLOCK, and UFFD_FLAGS_SET have been unused since they were added in commit 932b18e0aec6 ("userfaultfd: linux/userfaultfd_k.h"). Remove them and the associated BUILD_BUG_ON() checks. Link: https://lkml.kernel.org/r/20250619-uffd-fixes-v3-4-a7274d3bd5e4@columbia.edu Signed-off-by: Tal Zussman Acked-by: David Hildenbrand Acked-by: Peter Xu Cc: Al Viro Cc: Andrea Arcangeli Cc: Christian Brauner Cc: Jan Kara Cc: Jason A. Donenfeld Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index f3b3d2c9dd5e..ccad58602846 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -30,11 +30,7 @@ * from userfaultfd, in order to leave a free define-space for * shared O_* flags. */ -#define UFFD_CLOEXEC O_CLOEXEC -#define UFFD_NONBLOCK O_NONBLOCK - #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) -#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) /* * Start with fault_pending_wqh and fault_wqh so they're more likely -- cgit v1.2.3 From ff7ec8dc1b646296f8d94c39339e8d3833d16c05 Mon Sep 17 00:00:00 2001 From: wangzijie Date: Sat, 7 Jun 2025 10:13:53 +0800 Subject: proc: use the same treatment to check proc_lseek as ones for proc_read_iter et.al Check pde->proc_ops->proc_lseek directly may cause UAF in rmmod scenario. It's a gap in proc_reg_open() after commit 654b33ada4ab("proc: fix UAF in proc_get_inode()"). Followed by AI Viro's suggestion, fix it in same manner. Link: https://lkml.kernel.org/r/20250607021353.1127963-1-wangzijie1@honor.com Fixes: 3f61631d47f1 ("take care to handle NULL ->proc_lseek()") Signed-off-by: wangzijie Reviewed-by: Alexey Dobriyan Cc: Alexei Starovoitov Cc: Al Viro Cc: "Edgecombe, Rick P" Cc: Kirill A. Shuemov Signed-off-by: Andrew Morton --- include/linux/proc_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index ea62201c74c4..703d0c76cc9a 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -27,6 +27,7 @@ enum { PROC_ENTRY_proc_read_iter = 1U << 1, PROC_ENTRY_proc_compat_ioctl = 1U << 2, + PROC_ENTRY_proc_lseek = 1U << 3, }; struct proc_ops { -- cgit v1.2.3 From f5e8b140cd1324cf2c9c17487b8f444098624797 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 9 Jun 2025 10:27:25 +0100 Subject: mm/readahead: make space in struct file_ra_state We need to be able to store the preferred folio order associated with a readahead request in the struct file_ra_state so that we can more accurately increase the order across subsequent readahead requests. But struct file_ra_state is per-struct file, so we don't really want to increase it's size. mmap_miss is currently 32 bits but it is only counted up to 10 * MMAP_LOTSAMISS, which is currently defined as 1000. So 16 bits should be plenty. Redefine it to unsigned short, making room for order as unsigned short in follow up commit. Link: https://lkml.kernel.org/r/20250609092729.274960-4-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Reviewed-by: Jan Kara Cc: Chaitanya S Prakash Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 62634af97da6..ef819b232d66 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1054,7 +1054,7 @@ struct file_ra_state { unsigned int size; unsigned int async_size; unsigned int ra_pages; - unsigned int mmap_miss; + unsigned short mmap_miss; loff_t prev_pos; }; -- cgit v1.2.3 From c4602f9fa77fc6bb956ca51a23e7a39439e75cb6 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 9 Jun 2025 10:27:26 +0100 Subject: mm/readahead: store folio order in struct file_ra_state Previously the folio order of the previous readahead request was inferred from the folio who's readahead marker was hit. But due to the way we have to round to non-natural boundaries sometimes, this first folio in the readahead block is often smaller than the preferred order for that request. This means that for cases where the initial sync readahead is poorly aligned, the folio order will ramp up much more slowly. So instead, let's store the order in struct file_ra_state so we are not affected by any required alignment. We previously made enough room in the struct for a 16 order field. This should be plenty big enough since we are limited to MAX_PAGECACHE_ORDER anyway, which is certainly never larger than ~20. Since we now pass order in struct file_ra_state, page_cache_ra_order() no longer needs it's new_order parameter, so let's remove that. Worked example: Here we are touching pages 17-256 sequentially just as we did in the previous commit, but now that we are remembering the preferred order explicitly, we no longer have the slow ramp up problem. Note specifically that we no longer have 2 rounds (2x ~128K) of order-2 folios: TYPE STARTOFFS ENDOFFS SIZE STARTPG ENDPG NRPG ORDER RA ----- ---------- ---------- ---------- ------- ------- ----- ----- -- HOLE 0x00000000 0x00001000 4096 0 1 1 FOLIO 0x00001000 0x00002000 4096 1 2 1 0 FOLIO 0x00002000 0x00003000 4096 2 3 1 0 FOLIO 0x00003000 0x00004000 4096 3 4 1 0 FOLIO 0x00004000 0x00005000 4096 4 5 1 0 FOLIO 0x00005000 0x00006000 4096 5 6 1 0 FOLIO 0x00006000 0x00007000 4096 6 7 1 0 FOLIO 0x00007000 0x00008000 4096 7 8 1 0 FOLIO 0x00008000 0x00009000 4096 8 9 1 0 FOLIO 0x00009000 0x0000a000 4096 9 10 1 0 FOLIO 0x0000a000 0x0000b000 4096 10 11 1 0 FOLIO 0x0000b000 0x0000c000 4096 11 12 1 0 FOLIO 0x0000c000 0x0000d000 4096 12 13 1 0 FOLIO 0x0000d000 0x0000e000 4096 13 14 1 0 FOLIO 0x0000e000 0x0000f000 4096 14 15 1 0 FOLIO 0x0000f000 0x00010000 4096 15 16 1 0 FOLIO 0x00010000 0x00011000 4096 16 17 1 0 FOLIO 0x00011000 0x00012000 4096 17 18 1 0 FOLIO 0x00012000 0x00013000 4096 18 19 1 0 FOLIO 0x00013000 0x00014000 4096 19 20 1 0 FOLIO 0x00014000 0x00015000 4096 20 21 1 0 FOLIO 0x00015000 0x00016000 4096 21 22 1 0 FOLIO 0x00016000 0x00017000 4096 22 23 1 0 FOLIO 0x00017000 0x00018000 4096 23 24 1 0 FOLIO 0x00018000 0x00019000 4096 24 25 1 0 FOLIO 0x00019000 0x0001a000 4096 25 26 1 0 FOLIO 0x0001a000 0x0001b000 4096 26 27 1 0 FOLIO 0x0001b000 0x0001c000 4096 27 28 1 0 FOLIO 0x0001c000 0x0001d000 4096 28 29 1 0 FOLIO 0x0001d000 0x0001e000 4096 29 30 1 0 FOLIO 0x0001e000 0x0001f000 4096 30 31 1 0 FOLIO 0x0001f000 0x00020000 4096 31 32 1 0 FOLIO 0x00020000 0x00021000 4096 32 33 1 0 FOLIO 0x00021000 0x00022000 4096 33 34 1 0 FOLIO 0x00022000 0x00024000 8192 34 36 2 1 FOLIO 0x00024000 0x00028000 16384 36 40 4 2 FOLIO 0x00028000 0x0002c000 16384 40 44 4 2 FOLIO 0x0002c000 0x00030000 16384 44 48 4 2 FOLIO 0x00030000 0x00034000 16384 48 52 4 2 FOLIO 0x00034000 0x00038000 16384 52 56 4 2 FOLIO 0x00038000 0x0003c000 16384 56 60 4 2 FOLIO 0x0003c000 0x00040000 16384 60 64 4 2 FOLIO 0x00040000 0x00050000 65536 64 80 16 4 FOLIO 0x00050000 0x00060000 65536 80 96 16 4 FOLIO 0x00060000 0x00080000 131072 96 128 32 5 FOLIO 0x00080000 0x000a0000 131072 128 160 32 5 FOLIO 0x000a0000 0x000c0000 131072 160 192 32 5 FOLIO 0x000c0000 0x000e0000 131072 192 224 32 5 FOLIO 0x000e0000 0x00100000 131072 224 256 32 5 FOLIO 0x00100000 0x00120000 131072 256 288 32 5 FOLIO 0x00120000 0x00140000 131072 288 320 32 5 Y HOLE 0x00140000 0x00800000 7077888 320 2048 1728 Link: https://lkml.kernel.org/r/20250609092729.274960-5-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: Jan Kara Cc: Chaitanya S Prakash Cc: David Hildenbrand Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ef819b232d66..e14e9d11ca0f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1043,6 +1043,7 @@ struct fown_struct { * and so were/are genuinely "ahead". Start next readahead when * the first of these pages is accessed. * @ra_pages: Maximum size of a readahead request, copied from the bdi. + * @order: Preferred folio order used for most recent readahead. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. * @@ -1054,6 +1055,7 @@ struct file_ra_state { unsigned int size; unsigned int async_size; unsigned int ra_pages; + unsigned short order; unsigned short mmap_miss; loff_t prev_pos; }; -- cgit v1.2.3 From 38b0ece6d76374b989928021b5d310be11b99b5c Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 9 Jun 2025 10:27:27 +0100 Subject: mm/filemap: allow arch to request folio size for exec memory Change the readahead config so that if it is being requested for an executable mapping, do a synchronous read into a set of folios with an arch-specified order and in a naturally aligned manner. We no longer center the read on the faulting page but simply align it down to the previous natural boundary. Additionally, we don't bother with an asynchronous part. On arm64 if memory is physically contiguous and naturally aligned to the "contpte" size, we can use contpte mappings, which improves utilization of the TLB. When paired with the "multi-size THP" feature, this works well to reduce dTLB pressure. However iTLB pressure is still high due to executable mappings having a low likelihood of being in the required folio size and mapping alignment, even when the filesystem supports readahead into large folios (e.g. XFS). The reason for the low likelihood is that the current readahead algorithm starts with an order-0 folio and increases the folio order by 2 every time the readahead mark is hit. But most executable memory tends to be accessed randomly and so the readahead mark is rarely hit and most executable folios remain order-0. So let's special-case the read(ahead) logic for executable mappings. The trade-off is performance improvement (due to more efficient storage of the translations in iTLB) vs potential for making reclaim more difficult (due to the folios being larger so if a part of the folio is hot the whole thing is considered hot). But executable memory is a small portion of the overall system memory so I doubt this will even register from a reclaim perspective. I've chosen 64K folio size for arm64 which benefits both the 4K and 16K base page size configs. Crucially the same amount of data is still read (usually 128K) so I'm not expecting any read amplification issues. I don't anticipate any write amplification because text is always RO. Note that the text region of an ELF file could be populated into the page cache for other reasons than taking a fault in a mmapped area. The most common case is due to the loader read()ing the header which can be shared with the beginning of text. So some text will still remain in small folios, but this simple, best effort change provides good performance improvements as is. Confine this special-case approach to the bounds of the VMA. This prevents wasting memory for any padding that might exist in the file between sections. Previously the padding would have been contained in order-0 folios and would be easy to reclaim. But now it would be part of a larger folio so more difficult to reclaim. Solve this by simply not reading it into memory in the first place. Benchmarking ============ The below shows pgbench and redis benchmarks on Graviton3 arm64 system. First, confirmation that this patch causes more text to be contained in 64K folios: +----------------------+---------------+---------------+---------------+ | File-backed folios by| system boot | pgbench | redis | | size as percentage of+-------+-------+-------+-------+-------+-------+ | all mapped text mem |before | after |before | after |before | after | +======================+=======+=======+=======+=======+=======+=======+ | base-page-4kB | 78% | 30% | 78% | 11% | 73% | 14% | | thp-aligned-8kB | 1% | 0% | 0% | 0% | 1% | 0% | | thp-aligned-16kB | 17% | 4% | 17% | 3% | 20% | 4% | | thp-aligned-32kB | 1% | 1% | 1% | 2% | 1% | 1% | | thp-aligned-64kB | 3% | 63% | 3% | 81% | 4% | 77% | | thp-aligned-128kB | 0% | 1% | 1% | 1% | 1% | 2% | | thp-unaligned-64kB | 0% | 0% | 0% | 1% | 0% | 1% | | thp-unaligned-128kB | 0% | 1% | 0% | 0% | 0% | 0% | | thp-partial | 0% | 0% | 0% | 1% | 0% | 1% | +----------------------+-------+-------+-------+-------+-------+-------+ | cont-aligned-64kB | 4% | 65% | 4% | 83% | 6% | 79% | +----------------------+-------+-------+-------+-------+-------+-------+ The above shows that for both workloads (each isolated with cgroups) as well as the general system state after boot, the amount of text backed by 4K and 16K folios reduces and the amount backed by 64K folios increases significantly. And the amount of text that is contpte-mapped significantly increases (see last row). And this is reflected in performance improvement. "(I)" indicates a statistically significant improvement. Note TPS and Reqs/sec are rates so bigger is better, ms is time so smaller is better: +-------------+-------------------------------------------+------------+ | Benchmark | Result Class | Improvemnt | +=============+===========================================+============+ | pts/pgbench | Scale: 1 Clients: 1 RO (TPS) | (I) 3.47% | | | Scale: 1 Clients: 1 RO - Latency (ms) | -2.88% | | | Scale: 1 Clients: 250 RO (TPS) | (I) 5.02% | | | Scale: 1 Clients: 250 RO - Latency (ms) | (I) -4.79% | | | Scale: 1 Clients: 1000 RO (TPS) | (I) 6.16% | | | Scale: 1 Clients: 1000 RO - Latency (ms) | (I) -5.82% | | | Scale: 100 Clients: 1 RO (TPS) | 2.51% | | | Scale: 100 Clients: 1 RO - Latency (ms) | -3.51% | | | Scale: 100 Clients: 250 RO (TPS) | (I) 4.75% | | | Scale: 100 Clients: 250 RO - Latency (ms) | (I) -4.44% | | | Scale: 100 Clients: 1000 RO (TPS) | (I) 6.34% | | | Scale: 100 Clients: 1000 RO - Latency (ms)| (I) -5.95% | +-------------+-------------------------------------------+------------+ | pts/redis | Test: GET Connections: 50 (Reqs/sec) | (I) 3.20% | | | Test: GET Connections: 1000 (Reqs/sec) | (I) 2.55% | | | Test: LPOP Connections: 50 (Reqs/sec) | (I) 4.59% | | | Test: LPOP Connections: 1000 (Reqs/sec) | (I) 4.81% | | | Test: LPUSH Connections: 50 (Reqs/sec) | (I) 5.31% | | | Test: LPUSH Connections: 1000 (Reqs/sec) | (I) 4.36% | | | Test: SADD Connections: 50 (Reqs/sec) | (I) 2.64% | | | Test: SADD Connections: 1000 (Reqs/sec) | (I) 4.15% | | | Test: SET Connections: 50 (Reqs/sec) | (I) 3.11% | | | Test: SET Connections: 1000 (Reqs/sec) | (I) 3.36% | +-------------+-------------------------------------------+------------+ [ryan.roberts@arm.com: fix use-after-free] Link: https://lkml.kernel.org/r/ea7f9da7-9a9f-4b85-9d0a-35b320f5ed25@arm.com [ryan.roberts@arm.com: use the vma_pages() helper instead of open-coding] Link: https://lkml.kernel.org/r/0e0f674b-3b7e-494f-ae7a-fc9dbb98dad4@arm.com Link: https://lkml.kernel.org/r/20250609092729.274960-6-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: Jan Kara Acked-by: Will Deacon Cc: Chaitanya S Prakash Cc: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 0b6e1f781d86..e4a3895c043b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -456,6 +456,17 @@ static inline bool arch_has_hw_pte_young(void) } #endif +#ifndef exec_folio_order +/* + * Returns preferred minimum folio order for executable file-backed memory. Must + * be in range [0, PMD_ORDER). Default to order-0. + */ +static inline unsigned int exec_folio_order(void) +{ + return 0; +} +#endif + #ifndef arch_check_zapped_pte static inline void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) -- cgit v1.2.3 From 7e43195c609f0499e46c6bfa9472e39c76af445b Mon Sep 17 00:00:00 2001 From: Casey Chen Date: Tue, 10 Jun 2025 10:22:58 -0600 Subject: alloc_tag: remove empty module tag section The empty MOD_CODETAG_SECTIONS() macro added an incomplete .data section in module linker script, which caused symbol lookup tools like gdb to misinterpret symbol addresses e.g., __ib_process_cq incorrectly mapping to unrelated functions like below. (gdb) disas __ib_process_cq Dump of assembler code for function trace_event_fields_cq_schedule: Removing the empty section restores proper symbol resolution and layout, ensuring .data placement behaves as expected. Link: https://lkml.kernel.org/r/20250610162258.324645-1-cachen@purestorage.com Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory") 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling") Signed-off-by: Casey Chen Reviewed-by: Yuanyuan Zhong Acked-by: Suren Baghdasaryan Cc: Arnd Bergmann Cc: Kent Overstreet Cc: Luis Chamberalin Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/asm-generic/codetag.lds.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h index 372c320c5043..a45fe3d141a1 100644 --- a/include/asm-generic/codetag.lds.h +++ b/include/asm-generic/codetag.lds.h @@ -11,12 +11,6 @@ #define CODETAG_SECTIONS() \ SECTION_WITH_BOUNDARIES(alloc_tags) -/* - * Module codetags which aren't used after module unload, therefore have the - * same lifespan as the module and can be safely unloaded with the module. - */ -#define MOD_CODETAG_SECTIONS() - #define MOD_SEPARATE_CODETAG_SECTION(_name) \ .codetag.##_name : { \ SECTION_WITH_BOUNDARIES(_name) \ -- cgit v1.2.3 From 96d81e4766f9e88b66a0502b5a7f34a4c20ac754 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 5 Jun 2025 14:51:04 +0100 Subject: mm/pagewalk: split walk_page_range_novma() into kernel/user parts walk_page_range_novma() is rather confusing - it supports two modes, one used often, the other used only for debugging. The first mode is the common case of traversal of kernel page tables, which is what nearly all callers use this for. Secondly it provides an unusual debugging interface that allows for the traversal of page tables in a userland range of memory even for that memory which is not described by a VMA. It is far from certain that such page tables should even exist, but perhaps this is precisely why it is useful as a debugging mechanism. As a result, this is utilised by ptdump only. Historically, things were reversed - ptdump was the only user, and other parts of the kernel evolved to use the kernel page table walking here. Since we have some complicated and confusing locking rules for the novma case, it makes sense to separate the two usages into their own functions. Doing this also provide self-documentation as to the intent of the caller - are they doing something rather unusual or are they simply doing a standard kernel page table walk? We therefore establish two separate functions - walk_page_range_debug() for this single usage, and walk_kernel_page_table_range() for general kernel page table walking. The walk_page_range_debug() function is currently used to traverse both userland and kernel mappings, so we maintain this and in the case of kernel mappings being traversed, we have walk_page_range_debug() invoke walk_kernel_page_table_range() internally. We additionally make walk_page_range_debug() internal to mm. Link: https://lkml.kernel.org/r/20250605135104.90720-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Mike Rapoport (Microsoft) Acked-by: Qi Zheng Reviewed-by: Oscar Salvador Reviewed-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Albert Ou Cc: Alexandre Ghiti Cc: Barry Song Cc: Huacai Chen Cc: Jann Horn Cc: Jonas Bonn Cc: Liam Howlett Cc: Michal Hocko Cc: Muchun Song Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Stafford Horne Cc: Stefan Kristiansson Cc: WANG Xuerui Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 9700a29f8afb..8ac2f6d6d2a3 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -129,10 +129,9 @@ struct mm_walk { int walk_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); -int walk_page_range_novma(struct mm_struct *mm, unsigned long start, - unsigned long end, const struct mm_walk_ops *ops, - pgd_t *pgd, - void *private); +int walk_kernel_page_table_range(unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + pgd_t *pgd, void *private); int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); -- cgit v1.2.3 From b0da7709c28c35e0a51d4b1b350c9028358dfb14 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Mon, 9 Jun 2025 14:42:00 +0800 Subject: alloc_tag: add sequence number for module and iterator Codetag iterator use pair to guarantee the validness. But both id and address can be reused, there is theoretical possibility when module inserted right after another module removed, kmalloc returns an address same as the address kfree by previous module and IDR key reuses the key recently removed. Add a sequence number to codetag_module and code_iterator, the sequence number is strickly incremented whenever a module is loaded. An iterator is valid if and only if its sequence number match codetag_module's. Link: https://lkml.kernel.org/r/20250609064200.112639-1-00107082@163.com Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Tim Chen Signed-off-by: Andrew Morton --- include/linux/codetag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/codetag.h b/include/linux/codetag.h index 5f2b9a1f722c..457ed8fd3214 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -54,6 +54,7 @@ struct codetag_iterator { struct codetag_module *cmod; unsigned long mod_id; struct codetag *ct; + unsigned long mod_seq; }; #ifdef MODULE -- cgit v1.2.3 From cce35103135c7ffc7bebc32ebfc74fe1f2c3cb5d Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 10 Jun 2025 14:27:51 +0800 Subject: mm/memory-tier: fix abstract distance calculation overflow In mt_perf_to_adistance(), the calculation of abstract distance (adist) involves multiplying several int values including MEMTIER_ADISTANCE_DRAM. *adist = MEMTIER_ADISTANCE_DRAM * (perf->read_latency + perf->write_latency) / (default_dram_perf.read_latency + default_dram_perf.write_latency) * (default_dram_perf.read_bandwidth + default_dram_perf.write_bandwidth) / (perf->read_bandwidth + perf->write_bandwidth); Since these values can be large, the multiplication may exceed the maximum value of an int (INT_MAX) and overflow (Our platform did), leading to an incorrect adist. User-visible impact: The memory tiering subsystem will misinterpret slow memory (like CXL) as faster than DRAM, causing inappropriate demotion of pages from CXL (slow memory) to DRAM (fast memory). For example, we will see the following demotion chains from the dmesg, where Node0,1 are DRAM, and Node2,3 are CXL node: Demotion targets for Node 0: null Demotion targets for Node 1: null Demotion targets for Node 2: preferred: 0-1, fallback: 0-1 Demotion targets for Node 3: preferred: 0-1, fallback: 0-1 Change MEMTIER_ADISTANCE_DRAM to be a long constant by writing it with the 'L' suffix. This prevents the overflow because the multiplication will then be done in the long type which has a larger range. Link: https://lkml.kernel.org/r/20250611023439.2845785-1-lizhijian@fujitsu.com Link: https://lkml.kernel.org/r/20250610062751.2365436-1-lizhijian@fujitsu.com Fixes: 3718c02dbd4c ("acpi, hmat: calculate abstract distance with HMAT") Signed-off-by: Li Zhijian Reviewed-by: Huang Ying Acked-by: Balbir Singh Reviewed-by: Donet Tom Reviewed-by: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/memory-tiers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 0dc0cf2863e2..7a805796fcfd 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -18,7 +18,7 @@ * adistance value (slightly faster) than default DRAM adistance to be part of * the same memory tier. */ -#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) +#define MEMTIER_ADISTANCE_DRAM ((4L * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) struct memory_tier; struct memory_dev_type { -- cgit v1.2.3 From 4f8ba33bbdfc475fdc1ac5de6a93f5de93203ed5 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 11 Jun 2025 22:47:45 +1200 Subject: mm: madvise: use per_vma lock for MADV_FREE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MADV_FREE is another option, besides MADV_DONTNEED, for dynamic memory freeing in user-space native or Java heap memory management. For example, jemalloc can be configured to use MADV_FREE, and recent versions of the Android Java heap have also increasingly adopted MADV_FREE. Supporting per-VMA locking for MADV_FREE thus appears increasingly necessary. We have replaced walk_page_range() with walk_page_range_vma(). Along with the proposed madvise_lock_mode by Lorenzo, the necessary infrastructure is now in place to begin exploring per-VMA locking support for MADV_FREE and potentially other madvise using walk_page_range_vma(). This patch adds support for the PGWALK_VMA_RDLOCK walk_lock mode in walk_page_range_vma(), and leverages madvise_lock_mode from madv_behavior to select the appropriate walk_lock—either mmap_lock or per-VMA lock—based on the context. Because we now dynamically update the walk_ops->walk_lock field, we must ensure this is thread-safe. The madvise_free_walk_ops is now defined as a stack variable instead of a global constant. Link: https://lkml.kernel.org/r/20250611104745.57405-1-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: SeongJae Park Cc: "Liam R. Howlett" Cc: Vlastimil Babka Cc: Jann Horn Cc: Suren Baghdasaryan Cc: Lokesh Gidra Cc: Mike Rapoport Cc: Michal Hocko Cc: Tangquan Zheng Cc: Qi Zheng Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 8ac2f6d6d2a3..682472c15495 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -14,6 +14,8 @@ enum page_walk_lock { PGWALK_WRLOCK = 1, /* vma is expected to be already write-locked during the walk */ PGWALK_WRLOCK_VERIFY = 2, + /* vma is expected to be already read-locked during the walk */ + PGWALK_VMA_RDLOCK_VERIFY = 3, }; /** -- cgit v1.2.3 From 234dda7a49ff94154b527784687f549b9f1417c1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:34:41 +0100 Subject: mm: remove zero_user() All users have now been converted to either memzero_page() or folio_zero_range(). Link: https://lkml.kernel.org/r/20250612143443.2848197-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Alex Markuze Cc: Christoph Hellwig Cc: Ilya Dryomov Cc: Ira Weiny Cc: Jens Axboe Cc: Xiubo Li Cc: Dan Carpenter Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton --- include/linux/highmem.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e48d7f27b0b9..a30526cc53a7 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -292,12 +292,6 @@ static inline void zero_user_segment(struct page *page, zero_user_segments(page, start, end, 0, 0); } -static inline void zero_user(struct page *page, - unsigned start, unsigned size) -{ - zero_user_segments(page, start, start + size, 0, 0); -} - #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, -- cgit v1.2.3 From 02825c0925fbd53c085e88a1b7603eee8c9c6751 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 13 Jun 2025 11:27:02 +0200 Subject: mm/huge_memory: don't mark refcounted folios special in vmf_insert_folio_pud() Marking PUDs that map a "normal" refcounted folios as special is against our rules documented for vm_normal_page(). normal (refcounted) folios shall never have the page table mapping marked as special. Fortunately, there are not that many pud_special() check that can be mislead and are right now rather harmless: e.g., none so far bases decisions whether to grab a folio reference on that decision. Well, and GUP-fast will fallback to GUP-slow. All in all, so far no big implications as it seems. Getting this right will get more important as we introduce folio_normal_page_pud() and start using it in more place where we currently special-case based on other VMA flags. Fix it just like we fixed vmf_insert_folio_pmd(). Add folio_mk_pud() to mimic what we do with folio_mk_pmd(). Link: https://lkml.kernel.org/r/20250613092702.1943533-4-david@redhat.com Fixes: dbe54153296d ("mm/huge_memory: add vmf_insert_folio_pud()") Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Reviewed-by: Lorenzo Stoakes Reviewed-by: Jason Gunthorpe Tested-by: Dan Williams Reviewed-by: Oscar Salvador Cc: Alistair Popple Cc: Baolin Wang Cc: Dev Jain Cc: Liam Howlett Cc: Mariano Pache Cc: Michal Hocko Cc: Mike Rapoport Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..b7e2abd8ce0d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1816,7 +1816,24 @@ static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) { return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot)); } -#endif + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +/** + * folio_mk_pud - Create a PUD for this folio + * @folio: The folio to create a PUD for + * @pgprot: The page protection bits to use + * + * Create a page table entry for the first page of this folio. + * This is suitable for passing to set_pud_at(). + * + * Return: A page table entry suitable for mapping this folio. + */ +static inline pud_t folio_mk_pud(struct folio *folio, pgprot_t pgprot) +{ + return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot)); +} +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_MMU */ static inline bool folio_has_pincount(const struct folio *folio) -- cgit v1.2.3 From 9e82db9c0cdafa068969373b4bad140f72988e32 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 13 Jun 2025 20:48:23 +0100 Subject: highmem: remove a use of folio->page Call folio_address() instead of page_address(). Link: https://lkml.kernel.org/r/20250613194825.3175276-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/highmem-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 9a7683d79a4b..36053c3d6d64 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -195,7 +195,7 @@ static inline void *kmap_local_page_try_from_panic(struct page *page) static inline void *kmap_local_folio(struct folio *folio, size_t offset) { - return page_address(&folio->page) + offset; + return folio_address(folio) + offset; } static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) -- cgit v1.2.3 From 78ddaa358ec4cdd60bd0e243ced1c83a52c30241 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 18 Jun 2025 20:42:52 +0100 Subject: mm: change vm_get_page_prot() to accept vm_flags_t argument Patch series "use vm_flags_t consistently". The VMA flags field vma->vm_flags is of type vm_flags_t. Right now this is exactly equivalent to unsigned long, but it should not be assumed to be. Much code that references vma->vm_flags already correctly uses vm_flags_t, but a fairly large chunk of code simply uses unsigned long and assumes that the two are equivalent. This series corrects that and has us use vm_flags_t consistently. This series is motivated by the desire to, in a future series, adjust vm_flags_t to be a u64 regardless of whether the kernel is 32-bit or 64-bit in order to deal with the VMA flag exhaustion issue and avoid all the various problems that arise from it (being unable to use certain features in 32-bit, being unable to add new flags except for 64-bit, etc.) This is therefore a critical first step towards that goal. At any rate, using the correct type is of value regardless. We additionally take the opportunity to refer to VMA flags as vm_flags where possible to make clear what we're referring to. Overall, this series does not introduce any functional change. This patch (of 3): We abstract the type of the VMA flags to vm_flags_t, however in may places it is simply assumed this is unsigned long, which is simply incorrect. At the moment this is simply an incongruity, however in future we plan to change this type and therefore this change is a critical requirement for doing so. Overall, this patch does not introduce any functional change. [lorenzo.stoakes@oracle.com: add missing vm_get_page_prot() instance, remove include] Link: https://lkml.kernel.org/r/552f88e1-2df8-4e95-92b8-812f7c8db829@lucifer.local Link: https://lkml.kernel.org/r/cover.1750274467.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/a12769720a2743f235643b158c4f4f0a9911daf0.1750274467.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Mike Rapoport (Microsoft) Acked-by: Christian Brauner Reviewed-by: Vlastimil Babka Reviewed-by: Oscar Salvador Reviewed-by: Pedro Falcato Acked-by: Catalin Marinas [arm64] Acked-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Anshuman Khandual Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Jann Horn Cc: Kees Cook Cc: Jan Kara Cc: Jarkko Sakkinen Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 ++-- include/linux/pgtable.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b7e2abd8ce0d..78bb177ba55f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3489,10 +3489,10 @@ static inline bool range_in_vma(struct vm_area_struct *vma, } #ifdef CONFIG_MMU -pgprot_t vm_get_page_prot(unsigned long vm_flags); +pgprot_t vm_get_page_prot(vm_flags_t vm_flags); void vma_set_page_prot(struct vm_area_struct *vma); #else -static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) +static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) { return __pgprot(0); } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e4a3895c043b..d05e35a0facf 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -2016,7 +2016,7 @@ typedef unsigned int pgtbl_mod_mask; * x: (yes) yes */ #define DECLARE_VM_GET_PAGE_PROT \ -pgprot_t vm_get_page_prot(unsigned long vm_flags) \ +pgprot_t vm_get_page_prot(vm_flags_t vm_flags) \ { \ return protection_map[vm_flags & \ (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \ -- cgit v1.2.3 From bfbe71109fa40e8cc05a0f99e6734b7d76ee00b0 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 18 Jun 2025 20:42:53 +0100 Subject: mm: update core kernel code to use vm_flags_t consistently The core kernel code is currently very inconsistent in its use of vm_flags_t vs. unsigned long. This prevents us from changing the type of vm_flags_t in the future and is simply not correct, so correct this. While this results in rather a lot of churn, it is a critical pre-requisite for a future planned change to VMA flag type. Additionally, update VMA userland tests to account for the changes. To make review easier and to break things into smaller parts, driver and architecture-specific changes is left for a subsequent commit. The code has been adjusted to cascade the changes across all calling code as far as is needed. We will adjust architecture-specific and driver code in a subsequent patch. Overall, this patch does not introduce any functional change. Link: https://lkml.kernel.org/r/d1588e7bb96d1ea3fe7b9df2c699d5b4592d901d.1750274467.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Kees Cook Acked-by: Mike Rapoport (Microsoft) Acked-by: Jan Kara Acked-by: Christian Brauner Reviewed-by: Vlastimil Babka Acked-by: Oscar Salvador Reviewed-by: Pedro Falcato Acked-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Anshuman Khandual Cc: Jann Horn Cc: Liam R. Howlett Cc: Catalin Marinas Cc: Jarkko Sakkinen Signed-off-by: Andrew Morton --- include/linux/coredump.h | 2 +- include/linux/huge_mm.h | 12 ++++++------ include/linux/khugepaged.h | 4 ++-- include/linux/ksm.h | 4 ++-- include/linux/memfd.h | 4 ++-- include/linux/mm.h | 6 +++--- include/linux/mm_types.h | 2 +- include/linux/mman.h | 4 ++-- include/linux/rmap.h | 4 ++-- include/linux/userfaultfd_k.h | 4 ++-- include/trace/events/fs_dax.h | 6 +++--- 11 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 76e41805b92d..c504b0faecc2 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -10,7 +10,7 @@ #ifdef CONFIG_COREDUMP struct core_vma_metadata { unsigned long start, end; - unsigned long flags; + vm_flags_t flags; unsigned long dump_size; unsigned long pgoff; struct file *file; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2f190c90192d..7753daac49f7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -261,7 +261,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, } unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, unsigned long tva_flags, unsigned long orders); @@ -282,7 +282,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, */ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, + vm_flags_t vm_flags, unsigned long tva_flags, unsigned long orders) { @@ -317,7 +317,7 @@ struct thpsize { (1< Date: Wed, 18 Jun 2025 09:33:31 -0700 Subject: mm/damon: fix minor typos in damon header Fix typos in include/linux/damon.h. Link: https://lkml.kernel.org/r/20250618163331.54910-1-sj@kernel.org Signed-off-by: Nathan Gao Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index a4011726cb3b..bb58e36f019e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -450,7 +450,7 @@ struct damos_access_pattern { /** * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. - * @action: &damo_action to be applied to the target regions. + * @action: &damos_action to be applied to the target regions. * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. @@ -656,7 +656,7 @@ struct damon_call_control { * struct damon_intervals_goal - Monitoring intervals auto-tuning goal. * * @access_bp: Access events observation ratio to achieve in bp. - * @aggrs: Number of aggregations to acheive @access_bp within. + * @aggrs: Number of aggregations to achieve @access_bp within. * @min_sample_us: Minimum resulting sampling interval in microseconds. * @max_sample_us: Maximum resulting sampling interval in microseconds. * -- cgit v1.2.3 From d29d64afa2b20d9bd210c01bfff78545675b5135 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Wed, 18 Jun 2025 14:50:35 +0200 Subject: codetag: avoid unused alloc_tags sections/symbols With CONFIG_MEM_ALLOC_PROFILING=n, vmlinux and all modules unnecessarily contain the symbols __start_alloc_tags and __stop_alloc_tags, which define an empty range. In the case of modules, the presence of these symbols also forces the linker to create an empty .codetag.alloc_tags section. Update codetag.lds.h to make the data conditional on CONFIG_MEM_ALLOC_PROFILING. Link: https://lkml.kernel.org/r/20250618125037.53182-1-petr.pavlu@suse.com Signed-off-by: Petr Pavlu Reviewed-by: Kent Overstreet Reviewed-by: Suren Baghdasaryan Cc: Arnd Bergmann Cc: Casey Chen Cc: Petr Pavlu Signed-off-by: Andrew Morton --- include/asm-generic/codetag.lds.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h index a45fe3d141a1..a14f4bdafdda 100644 --- a/include/asm-generic/codetag.lds.h +++ b/include/asm-generic/codetag.lds.h @@ -2,6 +2,12 @@ #ifndef __ASM_GENERIC_CODETAG_LDS_H #define __ASM_GENERIC_CODETAG_LDS_H +#ifdef CONFIG_MEM_ALLOC_PROFILING +#define IF_MEM_ALLOC_PROFILING(...) __VA_ARGS__ +#else +#define IF_MEM_ALLOC_PROFILING(...) +#endif + #define SECTION_WITH_BOUNDARIES(_name) \ . = ALIGN(8); \ __start_##_name = .; \ @@ -9,7 +15,7 @@ __stop_##_name = .; #define CODETAG_SECTIONS() \ - SECTION_WITH_BOUNDARIES(alloc_tags) + IF_MEM_ALLOC_PROFILING(SECTION_WITH_BOUNDARIES(alloc_tags)) #define MOD_SEPARATE_CODETAG_SECTION(_name) \ .codetag.##_name : { \ @@ -22,6 +28,6 @@ * unload them individually once unused. */ #define MOD_SEPARATE_CODETAG_SECTIONS() \ - MOD_SEPARATE_CODETAG_SECTION(alloc_tags) + IF_MEM_ALLOC_PROFILING(MOD_SEPARATE_CODETAG_SECTION(alloc_tags)) #endif /* __ASM_GENERIC_CODETAG_LDS_H */ -- cgit v1.2.3 From 986f5f2b4be3b7eab9ecd85c472d03a2191d6fc0 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Tue, 17 Jun 2025 22:30:53 -0700 Subject: mm/hugetlb: make hugetlb_reserve_pages() return nr of entries updated Patch series "mm/memfd: Reserve hugetlb folios before allocation", v4. There are cases when we try to pin a folio but discover that it has not been faulted-in. So, we try to allocate it in memfd_alloc_folio() but the allocation request may not succeed if there are no active reservations in the system at that instant. Therefore, making a reservation (by calling hugetlb_reserve_pages()) associated with the allocation will ensure that our request would not fail due to lack of reservations. This will also ensure that proper region/subpool accounting is done with our allocation. This patch (of 3): Currently, hugetlb_reserve_pages() returns a bool to indicate whether the reservation map update for the range [from, to] was successful or not. This is not sufficient for the case where the caller needs to determine how many entries were updated for the range. Therefore, have hugetlb_reserve_pages() return the number of entries updated in the reservation map associated with the range [from, to]. Also, update the callers of hugetlb_reserve_pages() to handle the new return value. Link: https://lkml.kernel.org/r/20250618053415.1036185-1-vivek.kasireddy@intel.com Link: https://lkml.kernel.org/r/20250618053415.1036185-2-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy Cc: Steve Sistare Cc: Muchun Song Cc: David Hildenbrand Cc: Gerd Hoffmann Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 42f374e828a2..d8310b0f36dd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -149,7 +149,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, uffd_flags_t flags, struct folio **foliop); #endif /* CONFIG_USERFAULTFD */ -bool hugetlb_reserve_pages(struct inode *inode, long from, long to, +long hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, -- cgit v1.2.3 From 717cf9357325055ab6b41c4e0581f4d67601eb58 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Tue, 17 Jun 2025 22:30:54 -0700 Subject: mm/memfd: reserve hugetlb folios before allocation When we try to allocate a folio via alloc_hugetlb_folio_reserve(), we need to ensure that there is an active reservation associated with the allocation. Otherwise, our allocation request would fail if there are no active reservations made at that moment against any other allocations. This is because alloc_hugetlb_folio_reserve() checks h->resv_huge_pages before proceeding with the allocation. Therefore, to address this issue, we just need to make a reservation (by calling hugetlb_reserve_pages()) before we try to allocate the folio. This will also ensure that proper region/subpool accounting is done associated with our allocation. Link: https://lkml.kernel.org/r/20250618053415.1036185-3-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy Cc: Steve Sistare Cc: Muchun Song Cc: David Hildenbrand Cc: Gerd Hoffmann Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d8310b0f36dd..c6c87eae4a8d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -740,6 +740,11 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) +static inline struct hugepage_subpool *subpool_inode(struct inode *inode) +{ + return HUGETLBFS_SB(inode->i_sb)->spool; +} + static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return folio->_hugetlb_subpool; -- cgit v1.2.3 From 59b5ed409d03bc8b7bb153d78afcd7cea9d7bbfa Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Wed, 18 Jun 2025 09:58:09 +0800 Subject: mm/percpu: conditionally define _shared_alloc_tag via CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU Recently discovered this entry while checking kallsyms on ARM64: ffff800083e509c0 D _shared_alloc_tag If ARCH_NEEDS_WEAK_PER_CPU is not defined(it is only defined for s390 and alpha architectures), there's no need to statically define the percpu variable _shared_alloc_tag. Therefore, we need to implement isolation for this purpose. When building the core kernel code for s390 or alpha architectures, ARCH_NEEDS_WEAK_PER_CPU remains undefined (as it is gated by #if defined(MODULE)). However, when building modules for these architectures, the macro is explicitly defined. Therefore, we remove all instances of ARCH_NEEDS_WEAK_PER_CPU from the code and introduced CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU to replace the relevant logic. We can now conditionally define the perpcu variable _shared_alloc_tag based on CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU. This allows architectures (such as s390/alpha) that require weak definitions for percpu variables in modules to include the definition, while others can omit it via compile-time exclusion. Link: https://lkml.kernel.org/r/20250618015809.1235761-1-hao.ge@linux.dev Signed-off-by: Hao Ge Suggested-by: Suren Baghdasaryan Acked-by: Alexander Gordeev [s390] Acked-by: Mike Rapoport (Microsoft) Cc: Chistoph Lameter Cc: Christian Borntraeger Cc: David Hildenbrand Cc: Dennis Zhou Cc: Heiko Carstens Cc: Kent Overstreet Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matt Turner Cc: Richard Henderson Cc: Sven Schnelle Cc: Tejun Heo Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 6 +++--- include/linux/percpu-defs.h | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 8f7931eb7d16..9ef2633e2c08 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -88,7 +88,7 @@ static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) return container_of(ct, struct alloc_tag, ct); } -#ifdef ARCH_NEEDS_WEAK_PER_CPU +#if defined(CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU) && defined(MODULE) /* * When percpu variables are required to be defined as weak, static percpu * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). @@ -102,7 +102,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_shared_alloc_tag }; -#else /* ARCH_NEEDS_WEAK_PER_CPU */ +#else /* CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU && MODULE */ #ifdef MODULE @@ -123,7 +123,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #endif /* MODULE */ -#endif /* ARCH_NEEDS_WEAK_PER_CPU */ +#endif /* CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU && MODULE */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, mem_alloc_profiling_key); diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index c16cdeaa505e..12d90360f6db 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -63,14 +63,15 @@ * 1. The symbol must be globally unique, even the static ones. * 2. Static percpu variables cannot be defined inside a function. * - * Archs which need weak percpu definitions should define - * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary. + * Archs which need weak percpu definitions should set + * CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU when necessary. * * To ensure that the generic code observes the above two * restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak * definition is used for all cases. */ -#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU) +#if (defined(CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU) && defined(MODULE)) || \ + defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU) /* * __pcpu_scope_* dummy variable is used to enforce scope. It * receives the static modifier when it's used in front of -- cgit v1.2.3 From fd2825b0760a10a9626986cca64f5664302ffdfc Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 19 Jun 2025 18:57:57 +1000 Subject: mm/gup: remove pXX_devmap usage from get_user_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GUP uses pXX_devmap() calls to see if it needs to a get a reference on the associated pgmap data structure to ensure the pages won't go away. However it's a driver responsibility to ensure that if pages are mapped (ie. discoverable by GUP) that they are not offlined or removed from the memmap so there is no need to hold a reference on the pgmap data structure to ensure this. Furthermore mappings with PFN_DEV are no longer created, hence this effectively dead code anyway so can be removed. Link: https://lkml.kernel.org/r/708b2be76876659ec5261fe5d059b07268b98b36.1750323463.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Cc: Balbir Singh Cc: Björn Töpel Cc: Björn Töpel Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: David Hildenbrand Cc: Deepak Gupta Cc: Gerald Schaefer Cc: Inki Dae Cc: John Groves Cc: John Hubbard Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7753daac49f7..a2df2308cb2c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -473,9 +473,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) return folio_order(folio) >= HPAGE_PMD_ORDER; } -struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, int flags, struct dev_pagemap **pgmap); - vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct folio *huge_zero_folio; -- cgit v1.2.3 From 8a6a984c2e0ea406459b445a3910a454bece3aa1 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 19 Jun 2025 18:57:59 +1000 Subject: mm: remove redundant pXd_devmap calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DAX was the only thing that created pmd_devmap and pud_devmap entries however it no longer does as DAX pages are now refcounted normally and pXd_trans_huge() returns true for those. Therefore checking both pXd_devmap and pXd_trans_huge() is redundant and the former can be removed without changing behaviour as it will always be false. Link: https://lkml.kernel.org/r/d58f089dc16b7feb7c6728164f37dea65d64a0d3.1750323463.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Cc: Balbir Singh Cc: Björn Töpel Cc: Björn Töpel Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Williams Cc: David Hildenbrand Cc: Deepak Gupta Cc: Gerald Schaefer Cc: Inki Dae Cc: Jason Gunthorpe Cc: John Groves Cc: John Hubbard Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 10 ++++------ include/linux/pgtable.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a2df2308cb2c..26607f2c65fb 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -400,8 +400,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ - || pmd_devmap(*____pmd)) \ + if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false); \ } while (0) @@ -426,8 +425,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ - if (pud_trans_huge(*____pud) \ - || pud_devmap(*____pud)) \ + if (pud_trans_huge(*____pud)) \ __split_huge_pud(__vma, __pud, __address); \ } while (0) @@ -450,7 +448,7 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; @@ -458,7 +456,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { - if (pud_trans_huge(*pud) || pud_devmap(*pud)) + if (pud_trans_huge(*pud)) return __pud_trans_huge_lock(pud, vma); else return NULL; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index d05e35a0facf..ffcd966cf2d4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1672,7 +1672,7 @@ static inline int pud_trans_unstable(pud_t *pud) defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pudval = READ_ONCE(*pud); - if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) + if (pud_none(pudval) || pud_trans_huge(pudval)) return 1; if (unlikely(pud_bad(pudval))) { pud_clear_bad(pud); -- cgit v1.2.3 From d438d273417055241ebaaf1ba3be23459fc27cba Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 19 Jun 2025 18:58:03 +1000 Subject: mm: remove devmap related functions and page table bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that DAX and all other reference counts to ZONE_DEVICE pages are managed normally there is no need for the special devmap PTE/PMD/PUD page table bits. So drop all references to these, freeing up a software defined page table bit on architectures supporting it. Link: https://lkml.kernel.org/r/6389398c32cc9daa3dfcaa9f79c7972525d310ce.1750323463.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: Will Deacon # arm64 Acked-by: David Hildenbrand Suggested-by: Chunyan Zhang Reviewed-by: Björn Töpel Reviewed-by: Jason Gunthorpe Cc: Balbir Singh Cc: Björn Töpel Cc: Christoph Hellwig Cc: Dan Williams Cc: Deepak Gupta Cc: Gerald Schaefer Cc: Inki Dae Cc: John Groves Cc: John Hubbard Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ------- include/linux/pgtable.h | 19 ++----------------- 2 files changed, 2 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index fc365420dfa8..4d833f159988 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2704,13 +2704,6 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ -#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP -static inline int pte_devmap(pte_t pte) -{ - return 0; -} -#endif - extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ffcd966cf2d4..cf1515c163e2 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1643,21 +1643,6 @@ static inline int pud_write(pud_t pud) } #endif /* pud_write */ -#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline int pmd_devmap(pmd_t pmd) -{ - return 0; -} -static inline int pud_devmap(pud_t pud) -{ - return 0; -} -static inline int pgd_devmap(pgd_t pgd) -{ - return 0; -} -#endif - #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static inline int pud_trans_huge(pud_t pud) @@ -1912,8 +1897,8 @@ typedef unsigned int pgtbl_mod_mask; * - It should contain a huge PFN, which points to a huge page larger than * PAGE_SIZE of the platform. The PFN format isn't important here. * - * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(), - * pXd_devmap(), or hugetlb mappings). + * - It should cover all kinds of huge mappings (i.e. pXd_trans_huge() + * or hugetlb mappings). */ #ifndef pgd_leaf #define pgd_leaf(x) false -- cgit v1.2.3 From 984921edea68bf24bcc87e1317bfc90451ff46c6 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 19 Jun 2025 18:58:04 +1000 Subject: mm: remove PFN_DEV, PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PFN_MAP flag is no longer used for anything, so remove it. The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been used so also remove them. The last user of PFN_SPECIAL was removed by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support"). Users of PFN_DEV were removed earlier in this series by "mm: Remove remaining uses of PFN_DEV". Link: https://lkml.kernel.org/r/670b3950d70b4d97b905bb597dadfd3633de4314.1750323463.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Cc: Balbir Singh Cc: Björn Töpel Cc: Björn Töpel Cc: Chunyan Zhang Cc: Deepak Gupta Cc: Gerald Schaefer Cc: Inki Dae Cc: John Groves Cc: John Hubbard Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/pfn_t.h | 50 ++------------------------------------------------ 1 file changed, 2 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 2d9148221e9a..2c002936a0f6 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -5,26 +5,11 @@ /* * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags - * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry - * PFN_SG_LAST - pfn references a page and is the last scatterlist entry - * PFN_DEV - pfn is not covered by system memmap by default - * PFN_MAP - pfn has a dynamic page mapping established by a device driver - * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not - * get_user_pages */ #define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) -#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) -#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) -#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) -#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) -#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) #define PFN_FLAGS_TRACE \ - { PFN_SPECIAL, "SPECIAL" }, \ - { PFN_SG_CHAIN, "SG_CHAIN" }, \ - { PFN_SG_LAST, "SG_LAST" }, \ - { PFN_DEV, "DEV" }, \ - { PFN_MAP, "MAP" } + { } static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) { @@ -46,7 +31,7 @@ static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) static inline bool pfn_t_has_page(pfn_t pfn) { - return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0; + return true; } static inline unsigned long pfn_t_to_pfn(pfn_t pfn) @@ -97,35 +82,4 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) #endif #endif -#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP -static inline bool pfn_t_devmap(pfn_t pfn) -{ - const u64 flags = PFN_DEV|PFN_MAP; - - return (pfn.val & flags) == flags; -} -#else -static inline bool pfn_t_devmap(pfn_t pfn) -{ - return false; -} -pte_t pte_mkdevmap(pte_t pte); -pmd_t pmd_mkdevmap(pmd_t pmd); -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ - defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) -pud_t pud_mkdevmap(pud_t pud); -#endif -#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ - -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL -static inline bool pfn_t_special(pfn_t pfn) -{ - return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; -} -#else -static inline bool pfn_t_special(pfn_t pfn) -{ - return false; -} -#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ #endif /* _LINUX_PFN_T_H_ */ -- cgit v1.2.3 From 21aa65bf82a78c1e70447a45a85e533689b7f1a7 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 19 Jun 2025 18:58:05 +1000 Subject: mm: remove callers of pfn_t functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All PFN_* pfn_t flags have been removed. Therefore there is no longer a need for the pfn_t type and all uses can be replaced with normal pfns. Link: https://lkml.kernel.org/r/bbedfa576c9822f8032494efbe43544628698b1f.1750323463.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Cc: Balbir Singh Cc: Björn Töpel Cc: Björn Töpel Cc: Chunyan Zhang Cc: Dan Williams Cc: Deepak Gupta Cc: Gerald Schaefer Cc: Inki Dae Cc: John Groves Cc: John Hubbard Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/dax.h | 9 +++-- include/linux/device-mapper.h | 2 +- include/linux/huge_mm.h | 6 ++- include/linux/mm.h | 4 +- include/linux/pfn.h | 9 ----- include/linux/pfn_t.h | 85 ------------------------------------------- 6 files changed, 12 insertions(+), 103 deletions(-) delete mode 100644 include/linux/pfn_t.h (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index dcc9fcdf14e4..29eec755430b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -26,7 +26,7 @@ struct dax_operations { * number of pages available for DAX at that pfn. */ long (*direct_access)(struct dax_device *, pgoff_t, long, - enum dax_access_mode, void **, pfn_t *); + enum dax_access_mode, void **, unsigned long *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); /* @@ -241,7 +241,7 @@ static inline void dax_break_layout_final(struct inode *inode) bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - enum dax_access_mode mode, void **kaddr, pfn_t *pfn); + enum dax_access_mode mode, void **kaddr, unsigned long *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, @@ -255,9 +255,10 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, - pfn_t *pfnp, int *errp, const struct iomap_ops *ops); + unsigned long *pfnp, int *errp, + const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, - unsigned int order, pfn_t pfn); + unsigned int order, unsigned long pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); void dax_delete_mapping_range(struct address_space *mapping, loff_t start, loff_t end); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index cb95951547ab..84fdc3a6a19a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -156,7 +156,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, enum dax_access_mode node, void **kaddr, - pfn_t *pfn); + unsigned long *pfn); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 26607f2c65fb..8f1b15213f61 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -37,8 +37,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, unsigned long cp_flags); -vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); -vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, unsigned long pfn, + bool write); +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, unsigned long pfn, + bool write); vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, bool write); vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d833f159988..dccebf0abf06 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3522,9 +3522,9 @@ vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn); + unsigned long pfn); vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, - unsigned long addr, pfn_t pfn); + unsigned long addr, unsigned long pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 14bc053c53d8..b90ca0b6c331 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -4,15 +4,6 @@ #ifndef __ASSEMBLY__ #include - -/* - * pfn_t: encapsulates a page-frame number that is optionally backed - * by memmap (struct page). Whether a pfn_t has a 'struct page' - * backing is indicated by flags in the high bits of the value. - */ -typedef struct { - u64 val; -} pfn_t; #endif #define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK) diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h deleted file mode 100644 index 2c002936a0f6..000000000000 --- a/include/linux/pfn_t.h +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_PFN_T_H_ -#define _LINUX_PFN_T_H_ -#include - -/* - * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags - */ -#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) - -#define PFN_FLAGS_TRACE \ - { } - -static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) -{ - pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; - - return pfn_t; -} - -/* a default pfn to pfn_t conversion assumes that @pfn is pfn_valid() */ -static inline pfn_t pfn_to_pfn_t(unsigned long pfn) -{ - return __pfn_to_pfn_t(pfn, 0); -} - -static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) -{ - return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); -} - -static inline bool pfn_t_has_page(pfn_t pfn) -{ - return true; -} - -static inline unsigned long pfn_t_to_pfn(pfn_t pfn) -{ - return pfn.val & ~PFN_FLAGS_MASK; -} - -static inline struct page *pfn_t_to_page(pfn_t pfn) -{ - if (pfn_t_has_page(pfn)) - return pfn_to_page(pfn_t_to_pfn(pfn)); - return NULL; -} - -static inline phys_addr_t pfn_t_to_phys(pfn_t pfn) -{ - return PFN_PHYS(pfn_t_to_pfn(pfn)); -} - -static inline pfn_t page_to_pfn_t(struct page *page) -{ - return pfn_to_pfn_t(page_to_pfn(page)); -} - -static inline int pfn_t_valid(pfn_t pfn) -{ - return pfn_valid(pfn_t_to_pfn(pfn)); -} - -#ifdef CONFIG_MMU -static inline pte_t pfn_t_pte(pfn_t pfn, pgprot_t pgprot) -{ - return pfn_pte(pfn_t_to_pfn(pfn), pgprot); -} -#endif - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) -{ - return pfn_pmd(pfn_t_to_pfn(pfn), pgprot); -} - -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) -{ - return pfn_pud(pfn_t_to_pfn(pfn), pgprot); -} -#endif -#endif - -#endif /* _LINUX_PFN_T_H_ */ -- cgit v1.2.3 From 50b4233a22b1ee9ccd0e847597de66ce21329ddb Mon Sep 17 00:00:00 2001 From: Julian Vetter Date: Tue, 3 Jun 2025 15:21:21 +0200 Subject: include/linux/jhash.h: replace __get_unaligned_cpu32 in jhash function __get_unaligned_cpu32() is deprecated. So, replace it with the more generic get_unaligned() and just cast the input parameter. Link: https://lkml.kernel.org/r/20250603132121.3674066-1-julian@outer-limits.org Signed-off-by: Julian Vetter Cc: Arnd Bergmann Cc: Wei-Hsin Yeh Signed-off-by: Andrew Morton --- include/linux/jhash.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index fa26a2dd3b52..7c1c1821c694 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -24,7 +24,7 @@ * Jozsef */ #include -#include +#include /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) @@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { - a += __get_unaligned_cpu32(k); - b += __get_unaligned_cpu32(k + 4); - c += __get_unaligned_cpu32(k + 8); + a += get_unaligned((u32 *)k); + b += get_unaligned((u32 *)(k + 4)); + c += get_unaligned((u32 *)(k + 8)); __jhash_mix(a, b, c); length -= 12; k += 12; -- cgit v1.2.3 From 2489e958129ff7cbf26a34ee33cdc9ccbd68fe3c Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:57 +0800 Subject: relayfs: abolish prev_padding Patch series "relayfs: misc changes", v5. The series mostly focuses on the error counters which helps every user debug their own kernel module. This patch (of 5): prev_padding represents the unused space of certain subbuffer. If the content of a call of relay_write() exceeds the limit of the remainder of this subbuffer, it will skip storing in the rest space and record the start point as buf->prev_padding in relay_switch_subbuf(). Since the buf is a per-cpu big buffer, the point of prev_padding as a global value for the whole buffer instead of a single subbuffer (whose padding info is stored in buf->padding[]) seems meaningless from the real use cases, so we don't bother to record it any more. Link: https://lkml.kernel.org/r/20250612061201.34272-1-kerneljasonxing@gmail.com Link: https://lkml.kernel.org/r/20250612061201.34272-2-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/relay.h b/include/linux/relay.h index b3224111d074..e10a0fdf4325 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -47,7 +47,6 @@ struct rchan_buf unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ - size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ @@ -84,7 +83,6 @@ struct rchan_callbacks * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer - * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. @@ -100,8 +98,7 @@ struct rchan_callbacks */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding); + void *prev_subbuf); /* * create_buf_file - create file to represent a relay channel buffer -- cgit v1.2.3 From ca01a90ae7bf9bb22137e719366bdc0f387675c2 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:58 +0800 Subject: relayfs: support a counter tracking if per-cpu buffers is full When using relay mechanism, we often encounter the case where new data are lost or old unconsumed data are overwritten because of slow reader. Add 'full' field in per-cpu buffer structure to detect if the above case is happening. Relay has two modes: 1) non-overwrite mode, 2) overwrite mode. So buffer being full here respectively means: 1) relayfs doesn't intend to accept new data and then simply drop them, or 2) relayfs is going to start over again and overwrite old unread data with new data. Note: this counter doesn't need any explicit lock to protect from being modified by different threads for the better performance consideration. Writers calling __relay_write/relay_write should consider how to use the lock and ensure it performs under the lock protection, thus it's not necessary to add a new small lock here. Link: https://lkml.kernel.org/r/20250612061201.34272-3-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Jens Axboe Reviewed-by: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/relay.h b/include/linux/relay.h index e10a0fdf4325..cd77eb285a48 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -28,6 +28,14 @@ */ #define RELAYFS_CHANNEL_VERSION 7 +/* + * Relay buffer statistics + */ +struct rchan_buf_stats +{ + unsigned int full_count; /* counter for buffer full */ +}; + /* * Per-cpu relay channel buffer */ @@ -43,6 +51,7 @@ struct rchan_buf struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ + struct rchan_buf_stats stats; /* buffer stats */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ -- cgit v1.2.3 From a53202ce7fbafd24f854865b02eff891e246c550 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:59 +0800 Subject: relayfs: introduce getting relayfs statistics function In this version, only support getting the counter for buffer full and implement the framework of how it works. Users can pass certain flag to fetch what field/statistics they expect to know. Each time it only returns one result. So do not pass multiple flags. Link: https://lkml.kernel.org/r/20250612061201.34272-4-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/relay.h b/include/linux/relay.h index cd77eb285a48..5310967f9d74 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -31,6 +31,12 @@ /* * Relay buffer statistics */ +enum { + RELAY_STATS_BUF_FULL = (1 << 0), + + RELAY_STATS_LAST = RELAY_STATS_BUF_FULL, +}; + struct rchan_buf_stats { unsigned int full_count; /* counter for buffer full */ @@ -167,6 +173,7 @@ struct rchan *relay_open(const char *base_filename, void *private_data); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); +size_t relay_stats(struct rchan *chan, int flags); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); -- cgit v1.2.3 From 19f3cb64a25b80db667a00182785577fae465b3e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:12:01 +0800 Subject: relayfs: support a counter tracking if data is too big to write It really doesn't matter if the user/admin knows what the last too big value is. Record how many times this case is triggered would be helpful. Solve the existing issue where relay_reset() doesn't restore the value. Store the counter in the per-cpu buffer structure instead of the global buffer structure. It also solves the racy condition which is likely to happen when a few of per-cpu buffers encounter the too big data case and then access the global field last_toobig without lock protection. Remove the printk in relay_close() since kernel module can directly call relay_stats() as they want. Link: https://lkml.kernel.org/r/20250612061201.34272-6-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/relay.h b/include/linux/relay.h index 5310967f9d74..6772a7075840 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -33,13 +33,15 @@ */ enum { RELAY_STATS_BUF_FULL = (1 << 0), + RELAY_STATS_WRT_BIG = (1 << 1), - RELAY_STATS_LAST = RELAY_STATS_BUF_FULL, + RELAY_STATS_LAST = RELAY_STATS_WRT_BIG, }; struct rchan_buf_stats { unsigned int full_count; /* counter for buffer full */ + unsigned int big_count; /* counter for too big to write */ }; /* @@ -79,7 +81,6 @@ struct rchan const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ - size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ -- cgit v1.2.3 From 1857fcc847443b0238cb64584b43d8c3a9049a0a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Mar 2025 17:02:28 +0800 Subject: lib/raid6: replace custom zero page with ZERO_PAGE Use the system-wide zero page instead of a custom zero page. [herbert@gondor.apana.org.au: update lib/raid6/recov_rvv.c, per Klara] Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Link: https://lkml.kernel.org/r/Z9flJNkWQICx0PXk@gondor.apana.org.au Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- include/linux/raid/pq.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 72ff44cca864..2467b3be15c9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -11,8 +11,13 @@ #ifdef __KERNEL__ #include +#include -extern const char raid6_empty_zero_page[PAGE_SIZE]; +/* This should be const but the raid6 code is too convoluted for that. */ +static inline void *raid6_get_zero_page(void) +{ + return page_address(ZERO_PAGE(0)); +} #else /* ! __KERNEL__ */ /* Used for testing in user space */ @@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void) return tv.tv_sec*1000 + tv.tv_usec/1000; } +static inline void *raid6_get_zero_page(void) +{ + return raid6_empty_zero_page; +} + #endif /* ! __KERNEL__ */ #endif /* LINUX_RAID_RAID6_H */ -- cgit v1.2.3 From 76fdb7eb4e1c91086ce9c3db6972c2ed48c96afb Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 8 Jul 2025 23:21:51 +1000 Subject: uapi: export PROCFS_ROOT_INO The root inode of /proc having a fixed inode number has been part of the core kernel ABI since its inception, and recently some userspace programs (mainly container runtimes) have started to explicitly depend on this behaviour. The main reason this is useful to userspace is that by checking that a suspect /proc handle has fstype PROC_SUPER_MAGIC and is PROCFS_ROOT_INO, they can then use openat2(RESOLVE_{NO_{XDEV,MAGICLINK},BENEATH}) to ensure that there isn't a bind-mount that replaces some procfs file with a different one. This kind of attack has lead to security issues in container runtimes in the past (such as CVE-2019-19921) and libraries like libpathrs[1] use this feature of procfs to provide safe procfs handling functions. There was also some trailing whitespace in the "struct proc_dir_entry" initialiser, so fix that up as well. [1]: https://github.com/openSUSE/libpathrs Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250708-uapi-procfs-root-ino-v1-1-6ae61e97c79b@cyphar.com Signed-off-by: Christian Brauner --- include/linux/proc_ns.h | 1 - include/uapi/linux/fs.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 6258455e49a4..4b20375f3783 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -40,7 +40,6 @@ extern const struct proc_ns_operations timens_for_children_operations; * We always define these enumerators */ enum { - PROC_ROOT_INO = 1, PROC_IPC_INIT_INO = IPC_NS_INIT_INO, PROC_UTS_INIT_INO = UTS_NS_INIT_INO, PROC_USER_INIT_INO = USER_NS_INIT_INO, diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0098b0ce8ccb..28238a3edbc1 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -60,6 +60,17 @@ #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +/* + * The root inode of procfs is guaranteed to always have the same inode number. + * For programs that make heavy use of procfs, verifying that the root is a + * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH}) + * will allow you to make sure you are never tricked into operating on the + * wrong procfs file. + */ +enum procfs_ino { + PROCFS_ROOT_INO = 1, +}; + struct file_clone_range { __s64 src_fd; __u64 src_offset; -- cgit v1.2.3 From 626bb0a45584d544d84eab909795ccb355062bcc Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 13 Jun 2025 13:45:12 +0200 Subject: mfd: tps6594: Add TI TPS652G1 support The TPS652G1 is a stripped down version of the TPS65224. From a software point of view, it lacks any voltage monitoring, the watchdog, the ESM and the ADC. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20250613114518.1772109-2-mwalle@kernel.org Signed-off-by: Lee Jones --- include/linux/mfd/tps6594.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/tps6594.h b/include/linux/mfd/tps6594.h index 16543fd4d83e..021db8875963 100644 --- a/include/linux/mfd/tps6594.h +++ b/include/linux/mfd/tps6594.h @@ -19,6 +19,7 @@ enum pmic_id { TPS6593, LP8764, TPS65224, + TPS652G1, }; /* Macro to get page index from register address */ -- cgit v1.2.3 From dd831ac8221e691e9e918585b1003c7071df0379 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Sat, 5 Jul 2025 14:21:43 -0700 Subject: net/sched: sch_qfq: Fix null-deref in agg_dequeue To prevent a potential crash in agg_dequeue (net/sched/sch_qfq.c) when cl->qdisc->ops->peek(cl->qdisc) returns NULL, we check the return value before using it, similar to the existing approach in sch_hfsc.c. To avoid code duplication, the following changes are made: 1. Changed qdisc_warn_nonwc(include/net/pkt_sched.h) into a static inline function. 2. Moved qdisc_peek_len from net/sched/sch_hfsc.c to include/net/pkt_sched.h so that sch_qfq can reuse it. 3. Applied qdisc_peek_len in agg_dequeue to avoid crashing. Signed-off-by: Xiang Mei Reviewed-by: Cong Wang Link: https://patch.msgid.link/20250705212143.3982664-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- include/net/pkt_sched.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif -- cgit v1.2.3 From 7a8212f3bcfab753602599da644fb5bdeaf8ff2d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 11 Jun 2025 15:40:42 +0300 Subject: media: v4l: Make media_entity_to_video_device() NULL-safe Make media_entity_to_video_device(NULL) return NULL, instead of an invalid pointer value. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-dev.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index 1b6222fab24e..a69801274800 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -313,10 +313,16 @@ struct video_device { * media_entity_to_video_device - Returns a &struct video_device from * the &struct media_entity embedded on it. * - * @__entity: pointer to &struct media_entity - */ -#define media_entity_to_video_device(__entity) \ - container_of(__entity, struct video_device, entity) + * @__entity: pointer to &struct media_entity, may be NULL + */ +#define media_entity_to_video_device(__entity) \ +({ \ + typeof(__entity) __me_vdev_ent = __entity; \ + \ + __me_vdev_ent ? \ + container_of(__me_vdev_ent, struct video_device, entity) : \ + NULL; \ +}) /** * to_video_device - Returns a &struct video_device from the -- cgit v1.2.3 From 1fff2ee377e1c2230054e65092def460dd40b587 Mon Sep 17 00:00:00 2001 From: Mehdi Djait Date: Mon, 23 Jun 2025 15:51:15 +0200 Subject: media: uapi: videodev2: Fix comment for 12-bit packed Bayer formats For 12-bit packed Bayer formats: every two consecutive samples are packed into three bytes. Fix the corresponding comment. Signed-off-by: Mehdi Djait Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 6f7bd38dd5aa..1bb1979f6c18 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -726,7 +726,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_SGBRG12 v4l2_fourcc('G', 'B', '1', '2') /* 12 GBGB.. RGRG.. */ #define V4L2_PIX_FMT_SGRBG12 v4l2_fourcc('B', 'A', '1', '2') /* 12 GRGR.. BGBG.. */ #define V4L2_PIX_FMT_SRGGB12 v4l2_fourcc('R', 'G', '1', '2') /* 12 RGRG.. GBGB.. */ - /* 12bit raw bayer packed, 6 bytes for every 4 pixels */ + /* 12bit raw bayer packed, 3 bytes for every 2 pixels */ #define V4L2_PIX_FMT_SBGGR12P v4l2_fourcc('p', 'B', 'C', 'C') #define V4L2_PIX_FMT_SGBRG12P v4l2_fourcc('p', 'G', 'C', 'C') #define V4L2_PIX_FMT_SGRBG12P v4l2_fourcc('p', 'g', 'C', 'C') -- cgit v1.2.3 From c9596e882032827a5c7f99a5ca481cd46251a473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Sat, 5 Jul 2025 10:37:41 +0200 Subject: media: v4l2-subdev: Remove g_pixelaspect operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no consumers or implementations left in tree for the subdevice operation g_pixelaspect, delete it. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20250705083741.77517-4-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 57f2bcb4eb16..5dcf4065708f 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -460,8 +460,6 @@ enum v4l2_subdev_pre_streamon_flags { * but use the v4l2_subdev_enable_streams() and * v4l2_subdev_disable_streams() helpers. * - * @g_pixelaspect: callback to return the pixelaspect ratio. - * * @s_rx_buffer: set a host allocated memory buffer for the subdev. The subdev * can adjust @size to a lower value and must not write more data to the * buffer starting at @data than the original value of @size. @@ -491,7 +489,6 @@ struct v4l2_subdev_video_ops { int (*g_tvnorms_output)(struct v4l2_subdev *sd, v4l2_std_id *std); int (*g_input_status)(struct v4l2_subdev *sd, u32 *status); int (*s_stream)(struct v4l2_subdev *sd, int enable); - int (*g_pixelaspect)(struct v4l2_subdev *sd, struct v4l2_fract *aspect); int (*s_rx_buffer)(struct v4l2_subdev *sd, void *buf, unsigned int *size); int (*pre_streamon)(struct v4l2_subdev *sd, u32 flags); -- cgit v1.2.3 From 45e359be1ce88fb22e61fa3aa23b2e450a6cae03 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 5 Jul 2025 00:01:38 +0800 Subject: net: xsk: introduce XDP_MAX_TX_SKB_BUDGET setsockopt This patch provides a setsockopt method to let applications leverage to adjust how many descs to be handled at most in one send syscall. It mitigates the situation where the default value (32) that is too small leads to higher frequency of triggering send syscall. Considering the prosperity/complexity the applications have, there is no absolutely ideal suggestion fitting all cases. So keep 32 as its default value like before. The patch does the following things: - Add XDP_MAX_TX_SKB_BUDGET socket option. - Set max_tx_budget to 32 by default in the initialization phase as a per-socket granular control. - Set the range of max_tx_budget as [32, xs->tx->nentries]. The idea behind this comes out of real workloads in production. We use a user-level stack with xsk support to accelerate sending packets and minimize triggering syscalls. When the packets are aggregated, it's not hard to hit the upper bound (namely, 32). The moment user-space stack fetches the -EAGAIN error number passed from sendto(), it will loop to try again until all the expected descs from tx ring are sent out to the driver. Enlarging the XDP_MAX_TX_SKB_BUDGET value contributes to less frequency of sendto() and higher throughput/PPS. Here is what I did in production, along with some numbers as follows: For one application I saw lately, I suggested using 128 as max_tx_budget because I saw two limitations without changing any default configuration: 1) XDP_MAX_TX_SKB_BUDGET, 2) socket sndbuf which is 212992 decided by net.core.wmem_default. As to XDP_MAX_TX_SKB_BUDGET, the scenario behind this was I counted how many descs are transmitted to the driver at one time of sendto() based on [1] patch and then I calculated the possibility of hitting the upper bound. Finally I chose 128 as a suitable value because 1) it covers most of the cases, 2) a higher number would not bring evident results. After twisting the parameters, a stable improvement of around 4% for both PPS and throughput and less resources consumption were found to be observed by strace -c -p xxx: 1) %time was decreased by 7.8% 2) error counter was decreased from 18367 to 572 [1]: https://lore.kernel.org/all/20250619093641.70700-1-kerneljasonxing@gmail.com/ Signed-off-by: Jason Xing Acked-by: Maciej Fijalkowski Link: https://patch.msgid.link/20250704160138.48677-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- include/net/xdp_sock.h | 1 + include/uapi/linux/if_xdp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index e8bd6ddb7b12..ce587a225661 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -84,6 +84,7 @@ struct xdp_sock { struct list_head map_list; /* Protects map_list */ spinlock_t map_list_lock; + u32 max_tx_budget; /* Protects multiple processes in the control path */ struct mutex mutex; struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 44f2bb93e7e6..23a062781468 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ -- cgit v1.2.3 From bf8bbaefaa6ae0a07971ea57b3208df60e8ad0a4 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:06 +0200 Subject: drm/sched: Avoid memory leaks with cancel_job() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since its inception, the GPU scheduler can leak memory if the driver calls drm_sched_fini() while there are still jobs in flight. The simplest way to solve this in a backwards compatible manner is by adding a new callback, drm_sched_backend_ops.cancel_job(), which instructs the driver to signal the hardware fence associated with the job. Afterwards, the scheduler can safely use the established free_job() callback for freeing the job. Implement the new backend_ops callback cancel_job(). Suggested-by: Tvrtko Ursulin Link: https://lore.kernel.org/dri-devel/20250418113211.69956-1-tvrtko.ursulin@igalia.com/ Reviewed-by: Maíra Canal Acked-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-4-phasta@kernel.org --- include/drm/gpu_scheduler.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index e62a7214e052..190844370f48 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -512,6 +512,24 @@ struct drm_sched_backend_ops { * and it's time to clean it up. */ void (*free_job)(struct drm_sched_job *sched_job); + + /** + * @cancel_job: Used by the scheduler to guarantee remaining jobs' fences + * get signaled in drm_sched_fini(). + * + * Used by the scheduler to cancel all jobs that have not been executed + * with &struct drm_sched_backend_ops.run_job by the time + * drm_sched_fini() gets invoked. + * + * Drivers need to signal the passed job's hardware fence with an + * appropriate error code (e.g., -ECANCELED) in this callback. They + * must not free the job. + * + * The scheduler will only call this callback once it stopped calling + * all other callbacks forever, with the exception of &struct + * drm_sched_backend_ops.free_job. + */ + void (*cancel_job)(struct drm_sched_job *sched_job); }; /** -- cgit v1.2.3 From fca02263f27eee093379844ac0fb280bf70e6aed Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:54 -0700 Subject: iommufd: Correct virt_id kdoc at struct iommu_vdevice_alloc The userspace-api iommufd.rst has described it correctly but the uAPI doc was remained uncorrected. Thus, fix it. Link: https://patch.msgid.link/r/2cdcecaf2babee16fda7545ccad4e5bed7a5032d.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f29b6c44655e..4bc05e4621c1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -995,7 +995,7 @@ struct iommu_viommu_alloc { * @dev_id: The physical device to allocate a virtual instance on the vIOMMU * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID - * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * of AMD IOMMU, and vRID of Intel VT-d * * Allocate a virtual device instance (for a physical device) against a vIOMMU. * This instance holds the device's information (related to its vIOMMU) in a VM. -- cgit v1.2.3 From 4b57c057f9e6668ae442b19902dab8a73fe7b209 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:56 -0700 Subject: iommu: Use enum iommu_hw_info_type for type in hw_info op Replace u32 to make it clear. No functional changes. Also simplify the kdoc since the type itself is clear enough. Link: https://patch.msgid.link/r/651c50dee8ab900f691202ef0204cd5a43fdd6a2.1752126748.git.nicolinc@nvidia.com Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 04548b18df28..b87c2841e6bc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -563,8 +563,7 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this * op is allocated in the iommu driver and freed by the caller after - * use. The information type is one of enum iommu_hw_info_type defined - * in include/uapi/linux/iommufd.h. + * use. * @domain_alloc: Do not use in new drivers * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to * use identity_domain instead. This should only be used @@ -623,7 +622,8 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); - void *(*hw_info)(struct device *dev, u32 *length, u32 *type); + void *(*hw_info)(struct device *dev, u32 *length, + enum iommu_hw_info_type *type); /* Domain allocation and freeing by the iommu driver */ #if IS_ENABLED(CONFIG_FSL_PAMU) -- cgit v1.2.3 From 3fcf56a2393b399f289a473181ce6b19f716b59d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:57 -0700 Subject: iommu: Add iommu_copy_struct_to_user helper Similar to the iommu_copy_struct_from_user helper receiving data from the user space, add an iommu_copy_struct_to_user helper to report output data back to the user space data pointer. Link: https://patch.msgid.link/r/fa292c2a730aadd77085ec3a8272360c96eabb9c.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b87c2841e6bc..fd7319706684 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -558,6 +558,46 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, return 0; } +/** + * __iommu_copy_struct_to_user - Report iommu driver specific user space data + * @dst_data: Pointer to a struct iommu_user_data for user space data location + * @src_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @data_type: The data type of the @src_data. Must match with @dst_data.type + * @data_len: Length of current user data structure, i.e. sizeof(struct _src) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int +__iommu_copy_struct_to_user(const struct iommu_user_data *dst_data, + void *src_data, unsigned int data_type, + size_t data_len, size_t min_len) +{ + if (WARN_ON(!dst_data || !src_data)) + return -EINVAL; + if (dst_data->type != data_type) + return -EINVAL; + if (dst_data->len < min_len || data_len < dst_data->len) + return -EINVAL; + return copy_struct_to_user(dst_data->uptr, dst_data->len, src_data, + data_len, NULL); +} + +/** + * iommu_copy_struct_to_user - Report iommu driver specific user space data + * @user_data: Pointer to a struct iommu_user_data for user space data location + * @ksrc: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @data_type: The data type of the @ksrc. Must match with @user_data->type + * @min_last: The last member of the data structure @ksrc points in the initial + * version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_to_user(user_data, ksrc, data_type, min_last) \ + __iommu_copy_struct_to_user(user_data, ksrc, data_type, sizeof(*ksrc), \ + offsetofend(typeof(*ksrc), min_last)) + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability -- cgit v1.2.3 From c3436d42f812faffac94f8fb3fb246ab43ffdffe Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:58 -0700 Subject: iommu: Pass in a driver-level user data structure to viommu_init op The new type of vIOMMU for tegra241-cmdqv allows user space VM to use one of its virtual command queue HW resources exclusively. This requires user space to mmap the corresponding MMIO page from kernel space for direct HW control. To forward the mmap info (offset and length), iommufd should add a driver specific data structure to the IOMMUFD_CMD_VIOMMU_ALLOC ioctl, for driver to output the info during the vIOMMU initialization back to user space. Similar to the existing ioctls and their IOMMU handlers, add a user_data to viommu_init op to bridge between iommufd and drivers. Link: https://patch.msgid.link/r/90bd5637dab7f5507c7a64d2c4826e70431e45a4.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Reviewed-by: Vasant Hegde Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fd7319706684..e06a0fbe4bc7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -700,7 +700,8 @@ struct iommu_ops { size_t (*get_viommu_size)(struct device *dev, enum iommu_viommu_type viommu_type); int (*viommu_init)(struct iommufd_viommu *viommu, - struct iommu_domain *parent_domain); + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; -- cgit v1.2.3 From 1976cdf61ce9b6f97b5212676a3b9f74c68f6073 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:59 -0700 Subject: iommufd/viommu: Allow driver-specific user data for a vIOMMU object The new type of vIOMMU for tegra241-cmdqv driver needs a driver-specific user data. So, add data_len/uptr to the iommu_viommu_alloc uAPI and pass it in via the viommu_init iommu op. Link: https://patch.msgid.link/r/2315b0e164b355746387e960745ac9154caec124.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Acked-by: Pranjal Shrivastava Acked-by: Alok Tiwari Reviewed-by: Vasant Hegde Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4bc05e4621c1..04eee77335cf 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -965,6 +965,9 @@ enum iommu_viommu_type { * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU * @hwpt_id: ID of a nesting parent HWPT to associate to * @out_viommu_id: Output virtual IOMMU ID for the allocated object + * @data_len: Length of the type specific data + * @__reserved: Must be 0 + * @data_uptr: User pointer to a driver-specific virtual IOMMU data * * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's * virtualization support that is a security-isolated slice of the real IOMMU HW @@ -985,6 +988,9 @@ struct iommu_viommu_alloc { __u32 dev_id; __u32 hwpt_id; __u32 out_viommu_id; + __u32 data_len; + __u32 __reserved; + __aligned_u64 data_uptr; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) -- cgit v1.2.3 From 40b6a946d21ee7b2b6d394bb2f1cdd3973aa9da5 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:09 +0800 Subject: drm/ttm: add new api ttm_device_prepare_hibernation() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new api is used for hibernation to move GTT BOs to shmem after VRAM eviction. shmem will be flushed to swap disk later to reduce the system memory usage for hibernation. Signed-off-by: Samuel Zhang Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250710062313.3226149-2-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- include/drm/ttm/ttm_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 39b8636b1845..592b5f802859 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -272,6 +272,7 @@ struct ttm_device { int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_device_prepare_hibernation(struct ttm_device *bdev); static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) -- cgit v1.2.3 From c2aaddbd2deded9d3301f1bafed242a0f71baba8 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:12 +0800 Subject: PM: hibernate: add new api pm_hibernate_is_recovering() dev_pm_ops.thaw() is called in following cases: * normal case: after hibernation image has been created. * error case 1: creation of a hibernation image has failed. * error case 2: restoration from a hibernation image has failed. For normal case, it is called mainly for resume storage devices for saving the hibernation image. Other devices that are not involved in the image saving do not need to resume the device. But since there's no api to know which case thaw() is called, device drivers can't conditionally resume device in thaw(). The new pm_hibernate_is_recovering() is such a api to query if thaw() is called in normal case. Signed-off-by: Samuel Zhang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250710062313.3226149-5-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..293137210fdf 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -426,6 +426,8 @@ int is_hibernate_resume_dev(dev_t dev); static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif +bool pm_hibernate_is_recovering(void); + /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ -- cgit v1.2.3 From 47462586f91358499897fddb20f6bb9cec5f4213 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Jul 2025 23:07:52 -0700 Subject: fscrypt: Remove gfp_t argument from fscrypt_encrypt_block_inplace() This argument is no longer used, so remove it. Reviewed-by: Alex Markuze Link: https://lore.kernel.org/r/20250710060754.637098-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 56fad33043d5..8d0e3ad89b94 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -314,7 +314,7 @@ struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs, gfp_t gfp_flags); int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, - u64 lblk_num, gfp_t gfp_flags); + u64 lblk_num); int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs); @@ -487,8 +487,7 @@ static inline struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, - unsigned int offs, u64 lblk_num, - gfp_t gfp_flags) + unsigned int offs, u64 lblk_num) { return -EOPNOTSUPP; } -- cgit v1.2.3 From e41315787dda23daf146afb03b844d2c5880b72b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 4 Jul 2025 16:07:23 -0300 Subject: drm: Add missing struct drm_wedge_task_info kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following kernel doc warning: include/drm/drm_device.h:40: warning: Function parameter or struct member 'pid' not described in 'drm_wedge_task_info' include/drm/drm_device.h:40: warning: Function parameter or struct member 'comm' not described in 'drm_wedge_task_info' Fixes: 183bccafa176 ("drm: Create a task info option for wedge events") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20250618151307.4a1a5e17@canb.auug.org.au/ Reviewed-by: Raag Jadav Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250704190724.1159416-2-andrealmeid@igalia.com Signed-off-by: André Almeida --- include/drm/drm_device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 08b3b2467c4c..a33aedd5e9ec 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -35,7 +35,9 @@ struct pci_controller; * struct drm_wedge_task_info - information about the guilty task of a wedge dev */ struct drm_wedge_task_info { + /** @pid: pid of the task */ pid_t pid; + /** @comm: command name of the task */ char comm[TASK_COMM_LEN]; }; -- cgit v1.2.3 From a8b289f0f2dcbadd8c207ad8f33cf7ba2b4eb088 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Jul 2025 10:00:12 +0200 Subject: irqchip/irq-msi-lib: Fix build with PCI disabled The armada-370-xp irqchip fails in some randconfig builds because of a missing declaration: In file included from drivers/irqchip/irq-armada-370-xp.c:23: include/linux/irqchip/irq-msi-lib.h:25:39: error: 'struct msi_domain_info' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] Add a forward declaration for the msi_domain_info structure. [ tglx: Fixed up the subsystem prefix. Is it really that hard to get right? ] Fixes: e51b27438a10 ("irqchip: Make irq-msi-lib.h globally available") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20250710080021.2303640-1-arnd@kernel.org --- include/linux/irqchip/irq-msi-lib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h index dd8d1d138544..224ac28e88d7 100644 --- a/include/linux/irqchip/irq-msi-lib.h +++ b/include/linux/irqchip/irq-msi-lib.h @@ -17,6 +17,7 @@ #define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) +struct msi_domain_info; int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -- cgit v1.2.3 From 96698d1898bc79c783990ac7d5458b7c8f8e0b69 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Tue, 8 Jul 2025 11:33:42 +0800 Subject: net: replace ND_PRINTK with dynamic debug ND_PRINTK with val > 1 only works when the ND_DEBUG was set in compilation phase. Replace it with dynamic debug. Convert ND_PRINTK with val <= 1 to net_{err,warn}_ratelimited, and convert the rest to net_dbg_ratelimited. Suggested-by: Ido Schimmel Signed-off-by: Wang Liang Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250708033342.1627636-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- include/net/ndisc.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 3c88d5bc5eed..d38783a2ce57 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -60,15 +60,6 @@ enum { #include -/* Set to 3 to get tracing... */ -#define ND_DEBUG 1 - -#define ND_PRINTK(val, level, fmt, ...) \ -do { \ - if (val <= ND_DEBUG) \ - net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ -} while (0) - struct ctl_table; struct inet6_dev; struct net_device; -- cgit v1.2.3 From 18cdb3d982da8976b28d57691eb256ec5688fad2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jul 2025 12:45:17 +0000 Subject: netfilter: flowtable: account for Ethernet header in nf_flow_pppoe_proto() syzbot found a potential access to uninit-value in nf_flow_pppoe_proto() Blamed commit forgot the Ethernet header. BUG: KMSAN: uninit-value in nf_flow_offload_inet_hook+0x7e4/0x940 net/netfilter/nf_flow_table_inet.c:27 nf_flow_offload_inet_hook+0x7e4/0x940 net/netfilter/nf_flow_table_inet.c:27 nf_hook_entry_hookfn include/linux/netfilter.h:157 [inline] nf_hook_slow+0xe1/0x3d0 net/netfilter/core.c:623 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline] nf_ingress net/core/dev.c:5742 [inline] __netif_receive_skb_core+0x4aff/0x70c0 net/core/dev.c:5837 __netif_receive_skb_one_core net/core/dev.c:5975 [inline] __netif_receive_skb+0xcc/0xac0 net/core/dev.c:6090 netif_receive_skb_internal net/core/dev.c:6176 [inline] netif_receive_skb+0x57/0x630 net/core/dev.c:6235 tun_rx_batched+0x1df/0x980 drivers/net/tun.c:1485 tun_get_user+0x4ee0/0x6b40 drivers/net/tun.c:1938 tun_chr_write_iter+0x3e9/0x5c0 drivers/net/tun.c:1984 new_sync_write fs/read_write.c:593 [inline] vfs_write+0xb4b/0x1580 fs/read_write.c:686 ksys_write fs/read_write.c:738 [inline] __do_sys_write fs/read_write.c:749 [inline] Reported-by: syzbot+bf6ed459397e307c3ad2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/686bc073.a00a0220.c7b3.0086.GAE@google.com/T/#u Fixes: 87b3593bed18 ("netfilter: flowtable: validate pppoe header") Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20250707124517.614489-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_flow_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d711642e78b5..c003cd194fa2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); -- cgit v1.2.3 From d7974697de4d6fa1a1ed9ca43616a8500046f25a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Jul 2025 15:06:38 -0700 Subject: ethtool: mark ETHER_FLOW as usable for Rx hash Looks like some drivers (ena, enetc, fbnic.. there's probably more) consider ETHER_FLOW to be legitimate target for flow hashing. I'm not sure how intentional that is from the uAPI perspective vs just an effect of ethtool IOCTL doing minimal input validation. But Netlink will do strict validation, so we need to decide whether we allow this use case or not. I don't see a strong reason against it, and rejecting it would potentially regress a number of drivers. So update the comments and flow_type_hashable(). Link: https://patch.msgid.link/20250708220640.2738464-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 707c1844010c..9e9afdd1238a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2314,7 +2314,7 @@ enum { IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ IPV4_FLOW = 0x10, /* hash only */ IPV6_FLOW = 0x11, /* hash only */ - ETHER_FLOW = 0x12, /* spec only (ether_spec) */ + ETHER_FLOW = 0x12, /* hash or spec (ether_spec) */ /* Used for GTP-U IPv4 and IPv6. * The format of GTP packets only includes @@ -2371,7 +2371,7 @@ enum { /* Flag to enable RSS spreading of traffic matching rule (nfc only) */ #define FLOW_RSS 0x20000000 -/* L3-L4 network traffic flow hash options */ +/* L2-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) #define RXH_VLAN (1 << 2) #define RXH_L3_PROTO (1 << 3) -- cgit v1.2.3 From 178331743ca860561f60d04a7797a2fce13f0784 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Jul 2025 15:06:39 -0700 Subject: ethtool: rss: report which fields are configured for hashing Implement ETHTOOL_GRXFH over Netlink. The number of flow types is reasonable (around 20) so report all of them at once for simplicity. Do not maintain the flow ID mapping with ioctl at the uAPI level. This gives us a chance to clean up the confusion that come from RxNFC vs RxFH (flow direction vs hashing) in the ioctl. Try to align with the names used in ethtool CLI, they seem to have stood the test of time just fine. One annoyance is that we still call L4 ports the weird names, but I guess they also apply to IPSec (where they cover the SPI) so it is what it is. $ ynl --family ethtool --dump rss-get { "header": { "dev-index": 1, "dev-name": "enp1s0" }, "hfunc": 1, "hkey": b"...", "indir": [0, 1, ...], "flow-hash": { "ether": {"l2da"}, "ah-esp4": {"ip-src", "ip-dst"}, "ah-esp6": {"ip-src", "ip-dst"}, "ah4": {"ip-src", "ip-dst"}, "ah6": {"ip-src", "ip-dst"}, "esp4": {"ip-src", "ip-dst"}, "esp6": {"ip-src", "ip-dst"}, "ip4": {"ip-src", "ip-dst"}, "ip6": {"ip-src", "ip-dst"}, "sctp4": {"ip-src", "ip-dst"}, "sctp6": {"ip-src", "ip-dst"}, "udp4": {"ip-src", "ip-dst"}, "udp6": {"ip-src", "ip-dst"} "tcp4": {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}, "tcp6": {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}, }, } Link: https://patch.msgid.link/20250708220640.2738464-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 8f30ffa1cd14..96027e26ffba 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -678,6 +678,39 @@ enum { ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) }; +enum { + ETHTOOL_A_FLOW_ETHER = 1, + ETHTOOL_A_FLOW_IP4, + ETHTOOL_A_FLOW_IP6, + ETHTOOL_A_FLOW_TCP4, + ETHTOOL_A_FLOW_TCP6, + ETHTOOL_A_FLOW_UDP4, + ETHTOOL_A_FLOW_UDP6, + ETHTOOL_A_FLOW_SCTP4, + ETHTOOL_A_FLOW_SCTP6, + ETHTOOL_A_FLOW_AH4, + ETHTOOL_A_FLOW_AH6, + ETHTOOL_A_FLOW_ESP4, + ETHTOOL_A_FLOW_ESP6, + ETHTOOL_A_FLOW_AH_ESP4, + ETHTOOL_A_FLOW_AH_ESP6, + ETHTOOL_A_FLOW_GTPU4, + ETHTOOL_A_FLOW_GTPU6, + ETHTOOL_A_FLOW_GTPC4, + ETHTOOL_A_FLOW_GTPC6, + ETHTOOL_A_FLOW_GTPC_TEID4, + ETHTOOL_A_FLOW_GTPC_TEID6, + ETHTOOL_A_FLOW_GTPU_EH4, + ETHTOOL_A_FLOW_GTPU_EH6, + ETHTOOL_A_FLOW_GTPU_UL4, + ETHTOOL_A_FLOW_GTPU_UL6, + ETHTOOL_A_FLOW_GTPU_DL4, + ETHTOOL_A_FLOW_GTPU_DL6, + + __ETHTOOL_A_FLOW_CNT, + ETHTOOL_A_FLOW_MAX = (__ETHTOOL_A_FLOW_CNT - 1) +}; + enum { ETHTOOL_A_RSS_UNSPEC, ETHTOOL_A_RSS_HEADER, @@ -687,6 +720,7 @@ enum { ETHTOOL_A_RSS_HKEY, ETHTOOL_A_RSS_INPUT_XFRM, ETHTOOL_A_RSS_START_CONTEXT, + ETHTOOL_A_RSS_FLOW_HASH, __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1) -- cgit v1.2.3 From 4dc5f7b2c0ccf233d16c5f3090208d70954f1e2a Mon Sep 17 00:00:00 2001 From: Jacky Chou Date: Wed, 9 Jul 2025 15:08:07 +0800 Subject: dt-bindings: clock: ast2600: Add reset definitions for MAC1 and MAC2 Add ASPEED_RESET_MAC1 and ASPEED_RESET_MAC2 reset definitions to the ast2600-clock binding header. These are required for proper reset control of the MAC1 and MAC2 ethernet controllers on the AST2600 SoC. Signed-off-by: Jacky Chou Acked-by: Conor Dooley Acked-by: Stephen Boyd Link: https://patch.msgid.link/20250709070809.2560688-3-jacky_chou@aspeedtech.com Signed-off-by: Jakub Kicinski --- include/dt-bindings/clock/ast2600-clock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/ast2600-clock.h b/include/dt-bindings/clock/ast2600-clock.h index 7ae96c7bd72f..f60fff261130 100644 --- a/include/dt-bindings/clock/ast2600-clock.h +++ b/include/dt-bindings/clock/ast2600-clock.h @@ -122,6 +122,8 @@ #define ASPEED_RESET_PCIE_DEV_OEN 20 #define ASPEED_RESET_PCIE_RC_O 19 #define ASPEED_RESET_PCIE_RC_OEN 18 +#define ASPEED_RESET_MAC2 12 +#define ASPEED_RESET_MAC1 11 #define ASPEED_RESET_PCI_DP 5 #define ASPEED_RESET_HACE 4 #define ASPEED_RESET_AHB 1 -- cgit v1.2.3 From ed677858d4fe8d165952c1794898d6fc0b65ddfe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Jul 2025 18:04:04 +0200 Subject: ALSA: hda: Move widget capability macros into hdaudio.h The get_wcaps() and co are used not only by HD-audio core but also other driver code, hence it'd be better to put into the common header instead of local.h. OTOH, there are macros of the same name like get_wcaps() that are still used in sound/pci/hda/* locally, and those conflict with each other. So we need to rename get_wcaps() (to be moved from hda-core) with the proper snd_hdac prefix for avoiding name conflicts, and define in the common hdaudio.h. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250709160434.1859-2-tiwai@suse.de --- include/sound/hdaudio.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index 25668eee65cf..d38234f8fe44 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -680,6 +680,30 @@ static inline void snd_hdac_dsp_cleanup(struct hdac_stream *azx_dev, } #endif /* CONFIG_SND_HDA_DSP_LOADER */ +/* + * Easy macros for widget capabilities + */ +#define snd_hdac_get_wcaps(codec, nid) \ + snd_hdac_read_parm(codec, nid, AC_PAR_AUDIO_WIDGET_CAP) + +/* get the widget type from widget capability bits */ +static inline int snd_hdac_get_wcaps_type(unsigned int wcaps) +{ + if (!wcaps) + return -1; /* invalid type */ + return (wcaps & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; +} + +/* get the number of supported channels */ +static inline unsigned int snd_hdac_get_wcaps_channels(u32 wcaps) +{ + unsigned int chans; + + chans = (wcaps & AC_WCAP_CHAN_CNT_EXT) >> 13; + chans = (chans + 1) * 2; + + return chans; +} /* * generic array helpers -- cgit v1.2.3 From 6bf917e9aacc360e76b74249209a4290f2e8404b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Jul 2025 18:04:12 +0200 Subject: ALSA: hda: Introduce hda_codec_driver ops Until now, we use "patch_ops" embedded in hda_codec object for defining the callbacks that are used in various places to manage HD-audio codec. But from the device driver POV, this should have been rather the driver ops, instead of the callbacks in the codec object. This patch defines the driver ops for HD-audio codec driver as the replacement. We reuse the same struct hda_codec_ops, and this is put as hda_codec_driver.ops. When the driver->ops callbacks are defined, they are called primarily instead of codec->patch_ops callbacks. With converting to the driver ops, there is no need to pass the ugly patch_ops handling in hda_device_id tables. That is, driver_data field of hda_device_id becomes really optional and it can be used for passing the codec-specific data (e.g. specifying a model). The codec entries after the conversion should be with HDA_CODEC_ID() and co, instead of the former HDA_CODEC_ENTRY(). Once after converting all codec drivers to use driver ops, we can get rid of codec patch_ops. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250709160434.1859-10-tiwai@suse.de --- include/sound/hda_codec.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index c1fe6290d04d..a725ac48c20c 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -27,6 +27,7 @@ struct hda_beep; struct hda_codec; struct hda_pcm; struct hda_pcm_stream; +struct hda_codec_ops; /* * codec bus @@ -79,6 +80,17 @@ typedef int (*hda_codec_patch_t)(struct hda_codec *); #define HDA_CODEC_ID_GENERIC_HDMI 0x00000101 #define HDA_CODEC_ID_GENERIC 0x00000201 +#define HDA_CODEC_ID_REV_MODEL(_vid, _rev, _name, _model) \ + { .vendor_id = (_vid), .rev_id = (_rev), .name = (_name), \ + .api_version = HDA_DEV_LEGACY, .driver_data = (_model) } +#define HDA_CODEC_ID_MODEL(_vid, _name, _model) \ + HDA_CODEC_ID_REV_MODEL(_vid, 0, _name, _model) +#define HDA_CODEC_ID_REV(_vid, _rev, _name) \ + HDA_CODEC_ID_REV_MODEL(_vid, _rev, _name, 0) +#define HDA_CODEC_ID(_vid, _name) \ + HDA_CODEC_ID_REV(_vid, 0, _name) + +/* old macros for patch_ops -- to be deprecated */ #define HDA_CODEC_REV_ENTRY(_vid, _rev, _name, _patch) \ { .vendor_id = (_vid), .rev_id = (_rev), .name = (_name), \ .api_version = HDA_DEV_LEGACY, \ @@ -89,8 +101,12 @@ typedef int (*hda_codec_patch_t)(struct hda_codec *); struct hda_codec_driver { struct hdac_driver core; const struct hda_device_id *id; + const struct hda_codec_ops *ops; }; +#define hda_codec_to_driver(codec) \ + container_of((codec)->core.dev.driver, struct hda_codec_driver, core.driver) + int __hda_codec_driver_register(struct hda_codec_driver *drv, const char *name, struct module *owner); #define hda_codec_driver_register(drv) \ @@ -102,6 +118,8 @@ void hda_codec_driver_unregister(struct hda_codec_driver *drv); /* ops set by the preset patch */ struct hda_codec_ops { + int (*probe)(struct hda_codec *codec, const struct hda_device_id *id); + void (*remove)(struct hda_codec *codec); int (*build_controls)(struct hda_codec *codec); int (*build_pcms)(struct hda_codec *codec); int (*init)(struct hda_codec *codec); @@ -184,7 +202,7 @@ struct hda_codec { /* set by patch */ struct hda_codec_ops patch_ops; - /* PCM to create, set by patch_ops.build_pcms callback */ + /* PCM to create, set by hda_codec_ops.build_pcms callback */ struct list_head pcm_list_head; refcount_t pcm_ref; wait_queue_head_t remove_sleep; @@ -478,7 +496,11 @@ extern const struct dev_pm_ops hda_codec_driver_pm; static inline int hda_call_check_power_status(struct hda_codec *codec, hda_nid_t nid) { - if (codec->patch_ops.check_power_status) + struct hda_codec_driver *driver = hda_codec_to_driver(codec); + + if (driver->ops && driver->ops->check_power_status) + return driver->ops->check_power_status(codec, nid); + else if (codec->patch_ops.check_power_status) return codec->patch_ops.check_power_status(codec, nid); return 0; } -- cgit v1.2.3 From cabaf5908e586156d1d59f3fb8c51d2b169bc636 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Jul 2025 18:04:27 +0200 Subject: ALSA: hda: Drop old codec binding method Now that all patch_ops usage have been converted to the new hda_codec_ops probe, we can drop patch_ops from the hda_codec, together with the calls of patch_ops callbacks. The hda_codec_ops.free callback is removed as all have been replaced with the new remove callback. Also, correct comments mentioning "patch"; it's replaced with "codec driver". Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250709160434.1859-25-tiwai@suse.de --- include/sound/hda_codec.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index a725ac48c20c..ddc9c392f93f 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -70,12 +70,8 @@ struct hda_bus { /* * codec preset - * - * Known codecs have the patch to build and set up the controls/PCMs - * better than the generic parser. */ -typedef int (*hda_codec_patch_t)(struct hda_codec *); - + #define HDA_CODEC_ID_SKIP_PROBE 0x00000001 #define HDA_CODEC_ID_GENERIC_HDMI 0x00000101 #define HDA_CODEC_ID_GENERIC 0x00000201 @@ -90,14 +86,6 @@ typedef int (*hda_codec_patch_t)(struct hda_codec *); #define HDA_CODEC_ID(_vid, _name) \ HDA_CODEC_ID_REV(_vid, 0, _name) -/* old macros for patch_ops -- to be deprecated */ -#define HDA_CODEC_REV_ENTRY(_vid, _rev, _name, _patch) \ - { .vendor_id = (_vid), .rev_id = (_rev), .name = (_name), \ - .api_version = HDA_DEV_LEGACY, \ - .driver_data = (unsigned long)(_patch) } -#define HDA_CODEC_ENTRY(_vid, _name, _patch) \ - HDA_CODEC_REV_ENTRY(_vid, 0, _name, _patch) - struct hda_codec_driver { struct hdac_driver core; const struct hda_device_id *id; @@ -116,14 +104,13 @@ void hda_codec_driver_unregister(struct hda_codec_driver *drv); module_driver(drv, hda_codec_driver_register, \ hda_codec_driver_unregister) -/* ops set by the preset patch */ +/* ops for hda codec driver */ struct hda_codec_ops { int (*probe)(struct hda_codec *codec, const struct hda_device_id *id); void (*remove)(struct hda_codec *codec); int (*build_controls)(struct hda_codec *codec); int (*build_pcms)(struct hda_codec *codec); int (*init)(struct hda_codec *codec); - void (*free)(struct hda_codec *codec); void (*unsol_event)(struct hda_codec *codec, unsigned int res); void (*set_power_state)(struct hda_codec *codec, hda_nid_t fg, unsigned int power_state); @@ -199,9 +186,6 @@ struct hda_codec { const struct hda_device_id *preset; const char *modelname; /* model name for preset */ - /* set by patch */ - struct hda_codec_ops patch_ops; - /* PCM to create, set by hda_codec_ops.build_pcms callback */ struct list_head pcm_list_head; refcount_t pcm_ref; @@ -500,8 +484,6 @@ int hda_call_check_power_status(struct hda_codec *codec, hda_nid_t nid) if (driver->ops && driver->ops->check_power_status) return driver->ops->check_power_status(codec, nid); - else if (codec->patch_ops.check_power_status) - return codec->patch_ops.check_power_status(codec, nid); return 0; } -- cgit v1.2.3 From 5c21c5f22d0701ac6c1cafc0e8de4bf42e5c53e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 11 Jul 2025 15:47:48 +0200 Subject: cleanup: add a scoped version of CLASS() This will make it possible to use: scoped_class() { } constructs to limit variables to certain scopes and still perform auto-cleanup. Signed-off-by: Christian Brauner --- include/linux/cleanup.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 7093e1d08af0..bee606bebaca 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -277,6 +277,14 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ class_##_name##_t var __cleanup(class_##_name##_destructor) = \ class_##_name##_constructor +#define scoped_class(_name, var, args) \ + for (CLASS(_name, var)(args); \ + __guard_ptr(_name)(&var) || !__is_cond_ptr(_name); \ + ({ goto _label; })) \ + if (0) { \ +_label: \ + break; \ + } else /* * DEFINE_GUARD(name, type, lock, unlock): -- cgit v1.2.3 From 56180dd20c19e5b0fa34822997a9ac66b517e7b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Jul 2025 13:00:07 +0200 Subject: futex: Use RCU-based per-CPU reference counting instead of rcuref_t The use of rcuref_t for reference counting introduces a performance bottleneck when accessed concurrently by multiple threads during futex operations. Replace rcuref_t with special crafted per-CPU reference counters. The lifetime logic remains the same. The newly allocate private hash starts in FR_PERCPU state. In this state, each futex operation that requires the private hash uses a per-CPU counter (an unsigned int) for incrementing or decrementing the reference count. When the private hash is about to be replaced, the per-CPU counters are migrated to a atomic_t counter mm_struct::futex_atomic. The migration process: - Waiting for one RCU grace period to ensure all users observe the current private hash. This can be skipped if a grace period elapsed since the private hash was assigned. - futex_private_hash::state is set to FR_ATOMIC, forcing all users to use mm_struct::futex_atomic for reference counting. - After a RCU grace period, all users are guaranteed to be using the atomic counter. The per-CPU counters can now be summed up and added to the atomic_t counter. If the resulting count is zero, the hash can be safely replaced. Otherwise, active users still hold a valid reference. - Once the atomic reference count drops to zero, the next futex operation will switch to the new private hash. call_rcu_hurry() is used to speed up transition which otherwise might be delay with RCU_LAZY. There is nothing wrong with using call_rcu(). The side effects would be that on auto scaling the new hash is used later and the SET_SLOTS prctl() will block longer. [bigeasy: commit description + mm get/ put_async] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-3-bigeasy@linutronix.de --- include/linux/futex.h | 16 +++++----------- include/linux/mm_types.h | 5 +++++ include/linux/sched/mm.h | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index b37193653e6b..9e9750f04980 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -85,18 +85,12 @@ int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) #ifdef CONFIG_FUTEX_PRIVATE_HASH int futex_hash_allocate_default(void); void futex_hash_free(struct mm_struct *mm); - -static inline void futex_mm_init(struct mm_struct *mm) -{ - RCU_INIT_POINTER(mm->futex_phash, NULL); - mm->futex_phash_new = NULL; - mutex_init(&mm->futex_hash_lock); -} +int futex_mm_init(struct mm_struct *mm); #else /* !CONFIG_FUTEX_PRIVATE_HASH */ static inline int futex_hash_allocate_default(void) { return 0; } -static inline void futex_hash_free(struct mm_struct *mm) { } -static inline void futex_mm_init(struct mm_struct *mm) { } +static inline int futex_hash_free(struct mm_struct *mm) { return 0; } +static inline int futex_mm_init(struct mm_struct *mm) { return 0; } #endif /* CONFIG_FUTEX_PRIVATE_HASH */ #else /* !CONFIG_FUTEX */ @@ -118,8 +112,8 @@ static inline int futex_hash_allocate_default(void) { return 0; } -static inline void futex_hash_free(struct mm_struct *mm) { } -static inline void futex_mm_init(struct mm_struct *mm) { } +static inline int futex_hash_free(struct mm_struct *mm) { return 0; } +static inline int futex_mm_init(struct mm_struct *mm) { return 0; } #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d6b91e8a66d6..0f0662157066 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1070,6 +1070,11 @@ struct mm_struct { struct mutex futex_hash_lock; struct futex_private_hash __rcu *futex_phash; struct futex_private_hash *futex_phash_new; + /* futex-ref */ + unsigned long futex_batches; + struct rcu_head futex_rcu; + atomic_long_t futex_atomic; + unsigned int __percpu *futex_ref; #endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index b13474825130..2201da0afecc 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -140,7 +140,7 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) || defined(CONFIG_FUTEX_PRIVATE_HASH) /* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ -- cgit v1.2.3 From 760e6f7befbab9a84c54457a8ee45313b7b91ee5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:09 +0200 Subject: futex: Remove support for IMMUTABLE The FH_FLAG_IMMUTABLE flag was meant to avoid the reference counting on the private hash and so to avoid the performance regression on big machines. With the switch to per-CPU counter this is no longer needed. That flag was never useable on any released kernel. Remove any support for IMMUTABLE while preserve the flags argument and enforce it to be zero. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-5-bigeasy@linutronix.de --- include/uapi/linux/prctl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43dec6eed559..3b93fb906e3c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -367,8 +367,6 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From ed42eee797ff3dc889ade63c1dd7c4f430699e23 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:04 -0700 Subject: iommufd/viommu: Add driver-defined vDEVICE support NVIDIA VCMDQ driver will have a driver-defined vDEVICE structure and do some HW configurations with that. To allow IOMMU drivers to define their own vDEVICE structures, move the struct iommufd_vdevice to the public header and provide a pair of viommu ops, similar to get_viommu_size and viommu_init. Doing this, however, creates a new window between the vDEVICE allocation and its driver-level initialization, during which an abort could happen but it can't invoke a driver destroy function from the struct viommu_ops since the driver structure isn't initialized yet. vIOMMU object doesn't have this problem, since its destroy op is set via the viommu_ops by the driver viommu_init function. Thus, vDEVICE should do something similar: add a destroy function pointer inside the struct iommufd_vdevice instead of the struct iommufd_viommu_ops. Note that there is unlikely a use case for a type dependent vDEVICE, so a static vdevice_size is probably enough for the near term instead of a get_vdevice_size function op. Link: https://patch.msgid.link/r/1e751c01da7863c669314d8e27fdb89eabcf5605.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Pranjal Shrivastava Reviewed-by: Lu Baolu Reviewed-by: Vasant Hegde Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 2d1bf2f97ee3..bdd10a85eeef 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -104,6 +104,21 @@ struct iommufd_viommu { enum iommu_viommu_type type; }; +struct iommufd_vdevice { + struct iommufd_object obj; + struct iommufd_viommu *viommu; + struct device *dev; + + /* + * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of + * AMD IOMMU, and vRID of Intel VT-d + */ + u64 virt_id; + + /* Clean up all driver-specific parts of an iommufd_vdevice */ + void (*destroy)(struct iommufd_vdevice *vdev); +}; + /** * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory @@ -120,6 +135,14 @@ struct iommufd_viommu { * array->entry_num to report the number of handled requests. * The data structure of the array entry must be defined in * include/uapi/linux/iommufd.h + * @vdevice_size: Size of the driver-defined vDEVICE structure per this vIOMMU + * @vdevice_init: Initialize the driver-level structure of a vDEVICE object, or + * related HW procedure. @vdev is already initialized by iommufd + * core: vdev->dev and vdev->viommu pointers; vdev->id carries a + * per-vIOMMU virtual ID (refer to struct iommu_vdevice_alloc in + * include/uapi/linux/iommufd.h) + * If driver has a deinit function to revert what vdevice_init op + * does, it should set it to the @vdev->destroy function pointer */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); @@ -128,6 +151,8 @@ struct iommufd_viommu_ops { const struct iommu_user_data *user_data); int (*cache_invalidate)(struct iommufd_viommu *viommu, struct iommu_user_data_array *array); + const size_t vdevice_size; + int (*vdevice_init)(struct iommufd_vdevice *vdev); }; #if IS_ENABLED(CONFIG_IOMMUFD) @@ -224,4 +249,10 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ ((drv_struct *)NULL)->member))) + +#define VDEVICE_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \ + ((drv_struct *)NULL)->member))) #endif -- cgit v1.2.3 From e2e9360022585c21dc30d2b19f5866c252f40806 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:05 -0700 Subject: iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct Add IOMMUFD_OBJ_HW_QUEUE with an iommufd_hw_queue structure, representing a HW-accelerated queue type of IOMMU's physical queue that can be passed through to a user space VM for direct hardware control, such as: - NVIDIA's Virtual Command Queue - AMD vIOMMU's Command Buffer, Event Log Buffers, and PPR Log Buffers Add new viommu ops for iommufd to communicate with IOMMU drivers to fetch supported HW queue structure size and to forward user space ioctls to the IOMMU drivers for initialization/destroy. As the existing HWs, NVIDIA's VCMDQs access the guest memory via physical addresses, while AMD's Buffers access the guest memory via guest physical addresses (i.e. iova of the nesting parent HWPT). Separate two mutually exclusive hw_queue_init and hw_queue_init_phys ops to indicate whether a vIOMMU HW accesses the guest queue in the guest physical space (via iova) or the host physical space (via pa). In a latter case, the iommufd core will validate the physical pages of a given guest queue, to ensure the underlying physical pages are contiguous and pinned. Since this is introduced with NVIDIA's VCMDQs, add hw_queue_init_phys for now, and leave some notes for hw_queue_init in the near future (for AMD). Either NVIDIA's or AMD's HW is a multi-queue model: NVIDIA's will be only one type in enum iommu_hw_queue_type, while AMD's will be three different types (two of which will have multi queues). Compared to letting the core manage multiple queues with three types per vIOMMU object, it'd be easier for the driver to manage that by having three different driver-structure arrays per vIOMMU object. Thus, pass in the index to the init op. Link: https://patch.msgid.link/r/6939b73699e278e60ce167e911b3d9be68882bad.1752126748.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Vasant Hegde Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/iommufd.h | 9 +++++++++ 2 files changed, 51 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index bdd10a85eeef..f13f3ca6adb5 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -37,6 +37,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, IOMMUFD_OBJ_VEVENTQ, + IOMMUFD_OBJ_HW_QUEUE, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -119,6 +120,19 @@ struct iommufd_vdevice { void (*destroy)(struct iommufd_vdevice *vdev); }; +struct iommufd_hw_queue { + struct iommufd_object obj; + struct iommufd_viommu *viommu; + + u64 base_addr; /* in guest physical address space */ + size_t length; + + enum iommu_hw_queue_type type; + + /* Clean up all driver-specific parts of an iommufd_hw_queue */ + void (*destroy)(struct iommufd_hw_queue *hw_queue); +}; + /** * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory @@ -143,6 +157,22 @@ struct iommufd_vdevice { * include/uapi/linux/iommufd.h) * If driver has a deinit function to revert what vdevice_init op * does, it should set it to the @vdev->destroy function pointer + * @get_hw_queue_size: Get the size of a driver-defined HW queue structure for a + * given @viommu corresponding to @queue_type. Driver should + * return 0 if HW queue aren't supported accordingly. It is + * required for driver to use the HW_QUEUE_STRUCT_SIZE macro + * to sanitize the driver-level HW queue structure related + * to the core one + * @hw_queue_init_phys: Initialize the driver-level structure of a HW queue that + * is initialized with its core-level structure that holds + * all the info about a guest queue memory. + * Driver providing this op indicates that HW accesses the + * guest queue memory via physical addresses. + * @index carries the logical HW QUEUE ID per vIOMMU in a + * guest VM, for a multi-queue model. @base_addr_pa carries + * the physical location of the guest queue + * If driver has a deinit function to revert what this op + * does, it should set it to the @hw_queue->destroy pointer */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); @@ -153,6 +183,11 @@ struct iommufd_viommu_ops { struct iommu_user_data_array *array); const size_t vdevice_size; int (*vdevice_init)(struct iommufd_vdevice *vdev); + size_t (*get_hw_queue_size)(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type); + /* AMD's HW will add hw_queue_init simply using @hw_queue->base_addr */ + int (*hw_queue_init_phys)(struct iommufd_hw_queue *hw_queue, u32 index, + phys_addr_t base_addr_pa); }; #if IS_ENABLED(CONFIG_IOMMUFD) @@ -255,4 +290,11 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \ ((drv_struct *)NULL)->member))) + +#define HW_QUEUE_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_hw_queue, \ + ((drv_struct *)NULL)->member))) + #endif diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 04eee77335cf..640a8b5147c2 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1147,4 +1147,13 @@ struct iommu_veventq_alloc { __u32 __reserved; }; #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) + +/** + * enum iommu_hw_queue_type - HW Queue Type + * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_hw_queue_type { + IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, +}; + #endif -- cgit v1.2.3 From 2238ddc2b0560734c2dabb1c1fb4b342b5193625 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:06 -0700 Subject: iommufd/viommu: Add IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl Introduce a new IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl for user space to allocate a HW QUEUE object for a vIOMMU specific HW-accelerated queue, e.g.: - NVIDIA's Virtual Command Queue - AMD vIOMMU's Command Buffer, Event Log Buffers, and PPR Log Buffers Since this is introduced with NVIDIA's VCMDQs that access the guest memory in the physical address space, add an iommufd_hw_queue_alloc_phys() helper that will create an access object to the queue memory in the IOAS, to avoid the mappings of the guest memory from being unmapped, during the life cycle of the HW queue object. AMD's HW will need an hw_queue_init op that is mutually exclusive with the hw_queue_init_phys op, and their case will bypass the access part, i.e. no iommufd_hw_queue_alloc_phys() call. Link: https://patch.msgid.link/r/dab4ace747deb46c1fe70a5c663307f46990ae56.1752126748.git.nicolinc@nvidia.com Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 1 + include/uapi/linux/iommufd.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index f13f3ca6adb5..ce4011a2fc27 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -123,6 +123,7 @@ struct iommufd_vdevice { struct iommufd_hw_queue { struct iommufd_object obj; struct iommufd_viommu *viommu; + struct iommufd_access *access; u64 base_addr; /* in guest physical address space */ size_t length; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 640a8b5147c2..b928c1ed2395 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -56,6 +56,7 @@ enum { IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, + IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94, }; /** @@ -1156,4 +1157,37 @@ enum iommu_hw_queue_type { IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, }; +/** + * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC) + * @size: sizeof(struct iommu_hw_queue_alloc) + * @flags: Must be 0 + * @viommu_id: Virtual IOMMU ID to associate the HW queue with + * @type: One of enum iommu_hw_queue_type + * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue + * model + * @out_hw_queue_id: The ID of the new HW queue + * @nesting_parent_iova: Base address of the queue memory in the guest physical + * address space + * @length: Length of the queue memory + * + * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which + * allows HW to access a guest queue memory described using @nesting_parent_iova + * and @length. + * + * A vIOMMU can allocate multiple queues, but it must use a different @index per + * type to separate each allocation, e.g:: + * + * Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ... + */ +struct iommu_hw_queue_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 index; + __u32 out_hw_queue_id; + __aligned_u64 nesting_parent_iova; + __aligned_u64 length; +}; +#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC) #endif -- cgit v1.2.3 From 0b37d892d0425811618a737037b0212884cc25ae Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:07 -0700 Subject: iommufd/driver: Add iommufd_hw_queue_depend/undepend() helpers NVIDIA Virtual Command Queue is one of the iommufd users exposing vIOMMU features to user space VMs. Its hardware has a strict rule when mapping and unmapping multiple global CMDQVs to/from a VM-owned VINTF, requiring mappings in ascending order and unmappings in descending order. The tegra241-cmdqv driver can apply the rule for a mapping in the LVCMDQ allocation handler. However, it can't do the same for an unmapping since user space could start random destroy calls breaking the rule, while the destroy op in the driver level can't reject a destroy call as it returns void. Add iommufd_hw_queue_depend/undepend for-driver helpers, allowing LVCMDQ allocator to refcount_inc() a sibling LVCMDQ object and LVCMDQ destroyer to refcount_dec(), so that iommufd core will help block a random destroy call that breaks the rule. This is a bit of compromise, because a driver might end up with abusing the API that deadlocks the objects. So restrict the API to a dependency between two driver-allocated objects of the same type, as iommufd would unlikely build any core-level dependency in this case. And encourage to use the macro version that currently supports the HW QUEUE objects only. Link: https://patch.msgid.link/r/2735c32e759c82f2e6c87cb32134eaf09b7589b5.1752126748.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ce4011a2fc27..fa23439fa483 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -251,6 +251,10 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) #endif /* CONFIG_IOMMUFD */ #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) +int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended); +void _iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -259,6 +263,18 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, enum iommu_veventq_type type, void *event_data, size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ +static inline int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + return -EOPNOTSUPP; +} + +static inline void +_iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { @@ -298,4 +314,32 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_hw_queue, \ ((drv_struct *)NULL)->member))) +/* + * Helpers for IOMMU driver to build/destroy a dependency between two sibling + * structures created by one of the allocators above + */ +#define iommufd_hw_queue_depend(dependent, depended, member) \ + ({ \ + int ret = -EINVAL; \ + \ + static_assert(__same_type(struct iommufd_hw_queue, \ + dependent->member)); \ + static_assert(__same_type(typeof(*dependent), *depended)); \ + if (!WARN_ON_ONCE(dependent->member.viommu != \ + depended->member.viommu)) \ + ret = _iommufd_object_depend(&dependent->member.obj, \ + &depended->member.obj); \ + ret; \ + }) + +#define iommufd_hw_queue_undepend(dependent, depended, member) \ + ({ \ + static_assert(__same_type(struct iommufd_hw_queue, \ + dependent->member)); \ + static_assert(__same_type(typeof(*dependent), *depended)); \ + WARN_ON_ONCE(dependent->member.viommu != \ + depended->member.viommu); \ + _iommufd_object_undepend(&dependent->member.obj, \ + &depended->member.obj); \ + }) #endif -- cgit v1.2.3 From 0b226380d4cce2e6ee50800d861934d474a30121 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Thu, 10 Jul 2025 00:21:46 +0200 Subject: dt-bindings: memory: tegra: Add Tegra264 support Add bindings for the Memory Controller (MC) and External Memory Controller (EMC) found on the Tegra264 SoC. Tegra264 SoC has a different number of interrupt lines for MC sub-units: UCF_SOC, hub, hub common, syncpoint and MC channel. The total number of interrupt lines is eight. Update maxItems for MC interrupts accordingly. This also adds a header containing the memory client ID definitions that are used by the interconnects property in DT and the tegra_mc_client table in the MC driver. These IDs are defined by the hardware, so the numbering doesn't start at 0 and contains holes. Also added are the stream IDs for various hardware blocks found on Tegra264. These are allocated as blocks of 256 IDs and each block can be subdivided for additional fine-grained isolation if needed. Signed-off-by: Sumit Gupta [treding@nvidia.com: add SMMU stream IDs, squash patches] Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250709222147.3758356-2-thierry.reding@gmail.com Signed-off-by: Thierry Reding --- include/dt-bindings/memory/nvidia,tegra264.h | 136 +++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 include/dt-bindings/memory/nvidia,tegra264.h (limited to 'include') diff --git a/include/dt-bindings/memory/nvidia,tegra264.h b/include/dt-bindings/memory/nvidia,tegra264.h new file mode 100644 index 000000000000..521405c01f84 --- /dev/null +++ b/include/dt-bindings/memory/nvidia,tegra264.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. */ + +#ifndef DT_BINDINGS_MEMORY_NVIDIA_TEGRA264_H +#define DT_BINDINGS_MEMORY_NVIDIA_TEGRA264_H + +#define TEGRA264_SID(x) ((x) << 8) + +/* + * SMMU stream IDs + */ + +#define TEGRA264_SID_AON TEGRA264_SID(0x01) +#define TEGRA264_SID_APE TEGRA264_SID(0x02) +#define TEGRA264_SID_ETR TEGRA264_SID(0x03) +#define TEGRA264_SID_BPMP TEGRA264_SID(0x04) +#define TEGRA264_SID_DCE TEGRA264_SID(0x05) +#define TEGRA264_SID_EQOS TEGRA264_SID(0x06) +#define TEGRA264_SID_GPCDMA TEGRA264_SID(0x08) +#define TEGRA264_SID_DISP TEGRA264_SID(0x09) +#define TEGRA264_SID_HDA TEGRA264_SID(0x0a) +#define TEGRA264_SID_HOST1X TEGRA264_SID(0x0b) +#define TEGRA264_SID_ISP0 TEGRA264_SID(0x0c) +#define TEGRA264_SID_ISP1 TEGRA264_SID(0x0d) +#define TEGRA264_SID_PMA0 TEGRA264_SID(0x0e) +#define TEGRA264_SID_FSI0 TEGRA264_SID(0x0f) +#define TEGRA264_SID_FSI1 TEGRA264_SID(0x10) +#define TEGRA264_SID_PVA TEGRA264_SID(0x11) +#define TEGRA264_SID_SDMMC0 TEGRA264_SID(0x12) +#define TEGRA264_SID_MGBE0 TEGRA264_SID(0x13) +#define TEGRA264_SID_MGBE1 TEGRA264_SID(0x14) +#define TEGRA264_SID_MGBE2 TEGRA264_SID(0x15) +#define TEGRA264_SID_MGBE3 TEGRA264_SID(0x16) +#define TEGRA264_SID_MSSSEQ TEGRA264_SID(0x17) +#define TEGRA264_SID_SE TEGRA264_SID(0x18) +#define TEGRA264_SID_SEU1 TEGRA264_SID(0x19) +#define TEGRA264_SID_SEU2 TEGRA264_SID(0x1a) +#define TEGRA264_SID_SEU3 TEGRA264_SID(0x1b) +#define TEGRA264_SID_PSC TEGRA264_SID(0x1c) +#define TEGRA264_SID_OESP TEGRA264_SID(0x23) +#define TEGRA264_SID_SB TEGRA264_SID(0x24) +#define TEGRA264_SID_XSPI0 TEGRA264_SID(0x25) +#define TEGRA264_SID_TSEC TEGRA264_SID(0x29) +#define TEGRA264_SID_UFS TEGRA264_SID(0x2a) +#define TEGRA264_SID_RCE TEGRA264_SID(0x2b) +#define TEGRA264_SID_RCE1 TEGRA264_SID(0x2c) +#define TEGRA264_SID_VI TEGRA264_SID(0x2e) +#define TEGRA264_SID_VI1 TEGRA264_SID(0x2f) +#define TEGRA264_SID_VIC TEGRA264_SID(0x30) +#define TEGRA264_SID_XUSB_DEV TEGRA264_SID(0x32) +#define TEGRA264_SID_XUSB_DEV1 TEGRA264_SID(0x33) +#define TEGRA264_SID_XUSB_DEV2 TEGRA264_SID(0x34) +#define TEGRA264_SID_XUSB_DEV3 TEGRA264_SID(0x35) +#define TEGRA264_SID_XUSB_DEV4 TEGRA264_SID(0x36) +#define TEGRA264_SID_XUSB_DEV5 TEGRA264_SID(0x37) + +/* + * memory client IDs + */ + +/* HOST1X read client */ +#define TEGRA264_MEMORY_CLIENT_HOST1XR 0x16 +/* VIC read client */ +#define TEGRA264_MEMORY_CLIENT_VICR 0x6c +/* VIC Write client */ +#define TEGRA264_MEMORY_CLIENT_VICW 0x6d +/* VI R5 Write client */ +#define TEGRA264_MEMORY_CLIENT_VIW 0x72 +#define TEGRA264_MEMORY_CLIENT_NVDECSRD2MC 0x78 +#define TEGRA264_MEMORY_CLIENT_NVDECSWR2MC 0x79 +/* Audio processor(APE) Read client */ +#define TEGRA264_MEMORY_CLIENT_APER 0x7a +/* Audio processor(APE) Write client */ +#define TEGRA264_MEMORY_CLIENT_APEW 0x7b +/* Audio DMA Read client */ +#define TEGRA264_MEMORY_CLIENT_APEDMAR 0x9f +/* Audio DMA Write client */ +#define TEGRA264_MEMORY_CLIENT_APEDMAW 0xa0 +#define TEGRA264_MEMORY_CLIENT_GPUR02MC 0xb6 +#define TEGRA264_MEMORY_CLIENT_GPUW02MC 0xb7 +/* VI Falcon Read client */ +#define TEGRA264_MEMORY_CLIENT_VIFALCONR 0xbc +/* VI Falcon Write client */ +#define TEGRA264_MEMORY_CLIENT_VIFALCONW 0xbd +/* Read Client of RCE */ +#define TEGRA264_MEMORY_CLIENT_RCER 0xd2 +/* Write client of RCE */ +#define TEGRA264_MEMORY_CLIENT_RCEW 0xd3 +/* PCIE0/MSI Write clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE0W 0xd9 +/* PCIE1/RPX4 Read clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE1R 0xda +/* PCIE1/RPX4 Write clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE1W 0xdb +/* PCIE2/DMX4 Read clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE2AR 0xdc +/* PCIE2/DMX4 Write clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE2AW 0xdd +/* PCIE3/RPX4 Read clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE3R 0xde +/* PCIE3/RPX4 Write clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE3W 0xdf +/* PCIE4/DMX8 Read clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE4R 0xe0 +/* PCIE4/DMX8 Write clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE4W 0xe1 +/* PCIE5/DMX4 Read clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE5R 0xe2 +/* PCIE5/DMX4 Write clients */ +#define TEGRA264_MEMORY_CLIENT_PCIE5W 0xe3 +/* UFS Read client */ +#define TEGRA264_MEMORY_CLIENT_UFSR 0x15c +/* UFS write client */ +#define TEGRA264_MEMORY_CLIENT_UFSW 0x15d +/* HDA Read client */ +#define TEGRA264_MEMORY_CLIENT_HDAR 0x17c +/* HDA Write client */ +#define TEGRA264_MEMORY_CLIENT_HDAW 0x17d +/* Disp ISO Read Client */ +#define TEGRA264_MEMORY_CLIENT_DISPR 0x182 +/* MGBE0 Read mccif */ +#define TEGRA264_MEMORY_CLIENT_MGBE0R 0x1a2 +/* MGBE0 Write mccif */ +#define TEGRA264_MEMORY_CLIENT_MGBE0W 0x1a3 +/* MGBE1 Read mccif */ +#define TEGRA264_MEMORY_CLIENT_MGBE1R 0x1a4 +/* MGBE1 Write mccif */ +#define TEGRA264_MEMORY_CLIENT_MGBE1W 0x1a5 +/* VI1 R5 Write client */ +#define TEGRA264_MEMORY_CLIENT_VI1W 0x1a6 +/* SDMMC0 Read mccif */ +#define TEGRA264_MEMORY_CLIENT_SDMMC0R 0x1c2 +/* SDMMC0 Write mccif */ +#define TEGRA264_MEMORY_CLIENT_SDMMC0W 0x1c3 + +#endif /* DT_BINDINGS_MEMORY_NVIDIA_TEGRA264_H */ -- cgit v1.2.3 From 319cc06db42ac0cd6256a26b4ea21b52a6fe6308 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 8 Jul 2025 10:28:11 +0200 Subject: dt-bindings: Add Tegra264 clock and reset definitions The BPMP firmware on Tegra264 defines a set of IDs for clock and reset resources. These are not enumerations but provided by hardware, and 0 is a reserved value, hence the numbering starts at 1. Acked-by: Rob Herring (Arm) Signed-off-by: Thierry Reding --- include/dt-bindings/clock/nvidia,tegra264.h | 466 ++++++++++++++++++++++++++++ include/dt-bindings/reset/nvidia,tegra264.h | 92 ++++++ 2 files changed, 558 insertions(+) create mode 100644 include/dt-bindings/clock/nvidia,tegra264.h create mode 100644 include/dt-bindings/reset/nvidia,tegra264.h (limited to 'include') diff --git a/include/dt-bindings/clock/nvidia,tegra264.h b/include/dt-bindings/clock/nvidia,tegra264.h new file mode 100644 index 000000000000..0fc2ad5e6cef --- /dev/null +++ b/include/dt-bindings/clock/nvidia,tegra264.h @@ -0,0 +1,466 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved. */ + +#ifndef DT_BINDINGS_CLOCK_NVIDIA_TEGRA264_H +#define DT_BINDINGS_CLOCK_NVIDIA_TEGRA264_H + +#define TEGRA264_CLK_OSC 1 +#define TEGRA264_CLK_CLK_S 2 +#define TEGRA264_CLK_JTAG_REG 3 +#define TEGRA264_CLK_SPLL 4 +#define TEGRA264_CLK_SPLL_OUT0 5 +#define TEGRA264_CLK_SPLL_OUT1 6 +#define TEGRA264_CLK_SPLL_OUT2 7 +#define TEGRA264_CLK_SPLL_OUT3 8 +#define TEGRA264_CLK_SPLL_OUT4 9 +#define TEGRA264_CLK_SPLL_OUT5 10 +#define TEGRA264_CLK_SPLL_OUT6 11 +#define TEGRA264_CLK_SPLL_OUT7 12 +#define TEGRA264_CLK_AON_I2C 13 +#define TEGRA264_CLK_HOST1X 14 +#define TEGRA264_CLK_ISP 15 +#define TEGRA264_CLK_ISP1 16 +#define TEGRA264_CLK_ISP_ROOT 17 +#define TEGRA264_CLK_NAFLL_PVA0_CORE 18 +#define TEGRA264_CLK_NAFLL_PVA0_VPS 19 +#define TEGRA264_CLK_NVCSI 20 +#define TEGRA264_CLK_NVCSILP 21 +#define TEGRA264_CLK_PLLP_OUT0 22 +#define TEGRA264_CLK_PVA0_CPU_AXI 23 +#define TEGRA264_CLK_PVA0_VPS 24 +#define TEGRA264_CLK_PWM10 25 +#define TEGRA264_CLK_PWM2 26 +#define TEGRA264_CLK_PWM3 27 +#define TEGRA264_CLK_PWM4 28 +#define TEGRA264_CLK_PWM5 29 +#define TEGRA264_CLK_PWM9 30 +#define TEGRA264_CLK_QSPI0 31 +#define TEGRA264_CLK_QSPI0_2X_PM 32 +#define TEGRA264_CLK_RCE1_CPU 33 +#define TEGRA264_CLK_RCE1_NIC 34 +#define TEGRA264_CLK_RCE_CPU 35 +#define TEGRA264_CLK_RCE_NIC 36 +#define TEGRA264_CLK_SE 37 +#define TEGRA264_CLK_SEU1 38 +#define TEGRA264_CLK_SEU2 39 +#define TEGRA264_CLK_SEU3 40 +#define TEGRA264_CLK_SE_ROOT 41 +#define TEGRA264_CLK_SPI1 42 +#define TEGRA264_CLK_SPI2 43 +#define TEGRA264_CLK_SPI3 44 +#define TEGRA264_CLK_SPI4 45 +#define TEGRA264_CLK_SPI5 46 +#define TEGRA264_CLK_TOP_I2C 47 +#define TEGRA264_CLK_TSEC 48 +#define TEGRA264_CLK_TSEC_PKA 49 +#define TEGRA264_CLK_UART0 50 +#define TEGRA264_CLK_UART10 51 +#define TEGRA264_CLK_UART11 52 +#define TEGRA264_CLK_UART4 53 +#define TEGRA264_CLK_UART5 54 +#define TEGRA264_CLK_UART8 55 +#define TEGRA264_CLK_UART9 56 +#define TEGRA264_CLK_VI 57 +#define TEGRA264_CLK_VI1 58 +#define TEGRA264_CLK_VIC 59 +#define TEGRA264_CLK_VI_ROOT 60 +#define TEGRA264_CLK_DISPPLL 61 +#define TEGRA264_CLK_SPPLL0 62 +#define TEGRA264_CLK_SPPLL0_CLKOUT1A 63 +#define TEGRA264_CLK_SPPLL0_CLKOUT2A 64 +#define TEGRA264_CLK_SPPLL1 65 +#define TEGRA264_CLK_VPLL0 66 +#define TEGRA264_CLK_VPLL1 67 +#define TEGRA264_CLK_VPLL2 68 +#define TEGRA264_CLK_VPLL3 69 +#define TEGRA264_CLK_VPLL4 70 +#define TEGRA264_CLK_VPLL5 71 +#define TEGRA264_CLK_VPLL6 72 +#define TEGRA264_CLK_VPLL7 73 +#define TEGRA264_CLK_RG0_DIV 74 +#define TEGRA264_CLK_RG1_DIV 75 +#define TEGRA264_CLK_RG2_DIV 76 +#define TEGRA264_CLK_RG3_DIV 77 +#define TEGRA264_CLK_RG4_DIV 78 +#define TEGRA264_CLK_RG5_DIV 79 +#define TEGRA264_CLK_RG6_DIV 80 +#define TEGRA264_CLK_RG7_DIV 81 +#define TEGRA264_CLK_RG0 82 +#define TEGRA264_CLK_RG1 83 +#define TEGRA264_CLK_RG2 84 +#define TEGRA264_CLK_RG3 85 +#define TEGRA264_CLK_RG4 86 +#define TEGRA264_CLK_RG5 87 +#define TEGRA264_CLK_RG6 88 +#define TEGRA264_CLK_RG7 89 +#define TEGRA264_CLK_DISP 90 +#define TEGRA264_CLK_DSC 91 +#define TEGRA264_CLK_DSC_ROOT 92 +#define TEGRA264_CLK_HUB 93 +#define TEGRA264_CLK_VPLLX_SOR0_MUXED 94 +#define TEGRA264_CLK_VPLLX_SOR1_MUXED 95 +#define TEGRA264_CLK_VPLLX_SOR2_MUXED 96 +#define TEGRA264_CLK_VPLLX_SOR3_MUXED 97 +#define TEGRA264_CLK_LINKA_SYM 98 +#define TEGRA264_CLK_LINKB_SYM 99 +#define TEGRA264_CLK_LINKC_SYM 100 +#define TEGRA264_CLK_LINKD_SYM 101 +#define TEGRA264_CLK_PRE_SOR0 102 +#define TEGRA264_CLK_PRE_SOR1 103 +#define TEGRA264_CLK_PRE_SOR2 104 +#define TEGRA264_CLK_PRE_SOR3 105 +#define TEGRA264_CLK_SOR0_PLL_REF 106 +#define TEGRA264_CLK_SOR1_PLL_REF 107 +#define TEGRA264_CLK_SOR2_PLL_REF 108 +#define TEGRA264_CLK_SOR3_PLL_REF 109 +#define TEGRA264_CLK_SOR0_PAD 110 +#define TEGRA264_CLK_SOR1_PAD 111 +#define TEGRA264_CLK_SOR2_PAD 112 +#define TEGRA264_CLK_SOR3_PAD 113 +#define TEGRA264_CLK_SOR0_REF 114 +#define TEGRA264_CLK_SOR1_REF 115 +#define TEGRA264_CLK_SOR2_REF 116 +#define TEGRA264_CLK_SOR3_REF 117 +#define TEGRA264_CLK_SOR0_DIV 118 +#define TEGRA264_CLK_SOR1_DIV 119 +#define TEGRA264_CLK_SOR2_DIV 120 +#define TEGRA264_CLK_SOR3_DIV 121 +#define TEGRA264_CLK_SOR0 122 +#define TEGRA264_CLK_SOR1 123 +#define TEGRA264_CLK_SOR2 124 +#define TEGRA264_CLK_SOR3 125 +#define TEGRA264_CLK_SF0_SOR 126 +#define TEGRA264_CLK_SF1_SOR 127 +#define TEGRA264_CLK_SF2_SOR 128 +#define TEGRA264_CLK_SF3_SOR 129 +#define TEGRA264_CLK_SF4_SOR 130 +#define TEGRA264_CLK_SF5_SOR 131 +#define TEGRA264_CLK_SF6_SOR 132 +#define TEGRA264_CLK_SF7_SOR 133 +#define TEGRA264_CLK_SF0 134 +#define TEGRA264_CLK_SF1 135 +#define TEGRA264_CLK_SF2 136 +#define TEGRA264_CLK_SF3 137 +#define TEGRA264_CLK_SF4 138 +#define TEGRA264_CLK_SF5 139 +#define TEGRA264_CLK_SF6 140 +#define TEGRA264_CLK_SF7 141 +#define TEGRA264_CLK_MAUD 142 +#define TEGRA264_CLK_AZA_2XBIT 143 +#define TEGRA264_CLK_DCE_CPU 144 +#define TEGRA264_CLK_DCE_NIC 145 +#define TEGRA264_CLK_PLLC4 146 +#define TEGRA264_CLK_PLLC4_OUT0 147 +#define TEGRA264_CLK_PLLC4_OUT1 148 +#define TEGRA264_CLK_PLLC4_MUXED 149 +#define TEGRA264_CLK_SDMMC1 150 +#define TEGRA264_CLK_SDMMC_LEGACY_TM 151 +#define TEGRA264_CLK_PLLC0 152 +#define TEGRA264_CLK_NAFLL_BPMP 153 +#define TEGRA264_CLK_PLLP_OUT_PDIV 154 +#define TEGRA264_CLK_DISP_ROOT 155 +#define TEGRA264_CLK_ADSP 156 +#define TEGRA264_CLK_PLLA 157 +#define TEGRA264_CLK_PLLA1 158 +#define TEGRA264_CLK_PLLA1_OUT1 159 +#define TEGRA264_CLK_PLLAON 160 +#define TEGRA264_CLK_PLLAON_APE 161 +#define TEGRA264_CLK_PLLA_OUT0 162 +#define TEGRA264_CLK_AHUB 163 +#define TEGRA264_CLK_APE 164 +#define TEGRA264_CLK_I2S1_SCLK_IN 165 +#define TEGRA264_CLK_I2S2_SCLK_IN 166 +#define TEGRA264_CLK_I2S3_SCLK_IN 167 +#define TEGRA264_CLK_I2S4_SCLK_IN 168 +#define TEGRA264_CLK_I2S5_SCLK_IN 169 +#define TEGRA264_CLK_I2S6_SCLK_IN 170 +#define TEGRA264_CLK_I2S7_SCLK_IN 171 +#define TEGRA264_CLK_I2S8_SCLK_IN 172 +#define TEGRA264_CLK_I2S9_SCLK_IN 173 +#define TEGRA264_CLK_I2S1_AUDIO_SYNC 174 +#define TEGRA264_CLK_I2S2_AUDIO_SYNC 175 +#define TEGRA264_CLK_I2S3_AUDIO_SYNC 176 +#define TEGRA264_CLK_I2S4_AUDIO_SYNC 177 +#define TEGRA264_CLK_I2S5_AUDIO_SYNC 178 +#define TEGRA264_CLK_I2S6_AUDIO_SYNC 179 +#define TEGRA264_CLK_I2S7_AUDIO_SYNC 180 +#define TEGRA264_CLK_I2S8_AUDIO_SYNC 181 +#define TEGRA264_CLK_DMIC1_AUDIO_SYNC 182 +#define TEGRA264_CLK_DSPK1_AUDIO_SYNC 183 +#define TEGRA264_CLK_I2S1 184 +#define TEGRA264_CLK_I2S2 185 +#define TEGRA264_CLK_I2S3 186 +#define TEGRA264_CLK_I2S4 187 +#define TEGRA264_CLK_I2S5 188 +#define TEGRA264_CLK_I2S6 189 +#define TEGRA264_CLK_I2S7 190 +#define TEGRA264_CLK_I2S8 191 +#define TEGRA264_CLK_I2S9 192 +#define TEGRA264_CLK_DMIC1 193 +#define TEGRA264_CLK_DMIC5 194 +#define TEGRA264_CLK_DSPK1 195 +#define TEGRA264_CLK_AON_CPU 196 +#define TEGRA264_CLK_AON_NIC 197 +#define TEGRA264_CLK_BPMP 198 +#define TEGRA264_CLK_AXI_CBB 199 +#define TEGRA264_CLK_FUSE 200 +#define TEGRA264_CLK_TSENSE 201 +#define TEGRA264_CLK_CSITE 202 +#define TEGRA264_CLK_HCSITE 203 +#define TEGRA264_CLK_DBGAPB 204 +#define TEGRA264_CLK_LA 205 +#define TEGRA264_CLK_PLLREFGP 206 +#define TEGRA264_CLK_PLLE0 207 +#define TEGRA264_CLK_UPHY0_PLL0_XDIG 208 +#define TEGRA264_CLK_EQOS_APP 209 +#define TEGRA264_CLK_EQOS_MAC 210 +#define TEGRA264_CLK_EQOS_MACSEC 211 +#define TEGRA264_CLK_EQOS_TX_PCS 212 +#define TEGRA264_CLK_MGBES_PTP_REF 213 +#define TEGRA264_CLK_MGBE0_UPHY1_PLL_XDIG 214 +#define TEGRA264_CLK_MGBE0_TX_PCS 215 +#define TEGRA264_CLK_MGBE0_MAC 216 +#define TEGRA264_CLK_MGBE0_MACSEC 217 +#define TEGRA264_CLK_MGBE0_APP 218 +#define TEGRA264_CLK_MGBE1_UPHY1_PLL_XDIG 219 +#define TEGRA264_CLK_MGBE1_TX_PCS 220 +#define TEGRA264_CLK_MGBE1_MAC 221 +#define TEGRA264_CLK_MGBE1_MACSEC 222 +#define TEGRA264_CLK_MGBE1_APP 223 +#define TEGRA264_CLK_MGBE2_UPHY1_PLL_XDIG 224 +#define TEGRA264_CLK_MGBE2_TX_PCS 225 +#define TEGRA264_CLK_MGBE2_MAC 226 +#define TEGRA264_CLK_MGBE2_MACSEC 227 +#define TEGRA264_CLK_MGBE2_APP 228 +#define TEGRA264_CLK_MGBE3_UPHY1_PLL_XDIG 229 +#define TEGRA264_CLK_MGBE3_TX_PCS 230 +#define TEGRA264_CLK_MGBE3_MAC 231 +#define TEGRA264_CLK_MGBE3_MACSEC 232 +#define TEGRA264_CLK_MGBE3_APP 233 +#define TEGRA264_CLK_PLLREFUFS 234 +#define TEGRA264_CLK_PLLREFUFS_CLKOUT624 235 +#define TEGRA264_CLK_PLLREFUFS_REFCLKOUT 236 +#define TEGRA264_CLK_PLLREFUFS_UFSDEV_REFCLKOUT 237 +#define TEGRA264_CLK_UFSHC_CG_SYS 238 +#define TEGRA264_CLK_MPHY_L0_RX_LS_BIT_DIV 239 +#define TEGRA264_CLK_MPHY_L0_RX_LS_BIT 240 +#define TEGRA264_CLK_MPHY_L0_RX_LS_SYMB_DIV 241 +#define TEGRA264_CLK_MPHY_L0_RX_HS_SYMB_DIV 242 +#define TEGRA264_CLK_MPHY_L0_RX_SYMB 243 +#define TEGRA264_CLK_MPHY_L0_UPHY_TX_FIFO 244 +#define TEGRA264_CLK_MPHY_L0_TX_LS_3XBIT_DIV 245 +#define TEGRA264_CLK_MPHY_L0_TX_LS_SYMB_DIV 246 +#define TEGRA264_CLK_UPHY0_PLL4_XDIG 247 +#define TEGRA264_CLK_MPHY_L0_TX_HS_SYMB_DIV 248 +#define TEGRA264_CLK_MPHY_L0_TX_SYMB 249 +#define TEGRA264_CLK_MPHY_L0_TX_LS_3XBIT 250 +#define TEGRA264_CLK_MPHY_L0_RX_ANA 251 +#define TEGRA264_CLK_MPHY_L1_RX_ANA 252 +#define TEGRA264_CLK_MPHY_TX_1MHZ_REF 253 +#define TEGRA264_CLK_MPHY_CORE_PLL_FIXED 254 +#define TEGRA264_CLK_MPHY_IOBIST 255 +#define TEGRA264_CLK_UFSHC_CG_SYS_DIV 256 +#define TEGRA264_CLK_XUSB1_CORE 257 +#define TEGRA264_CLK_XUSB1_FALCON 258 +#define TEGRA264_CLK_XUSB1_FS 259 +#define TEGRA264_CLK_XUSB1_SS 260 +#define TEGRA264_CLK_UPHY0_USB_P0_RX_CORE 261 +#define TEGRA264_CLK_UPHY0_USB_P1_RX_CORE 262 +#define TEGRA264_CLK_UPHY0_USB_P2_RX_CORE 263 +#define TEGRA264_CLK_UPHY0_USB_P3_RX_CORE 264 +#define TEGRA264_CLK_XUSB1_CLK480M_NVWRAP_CORE 265 +#define TEGRA264_CLK_XUSB1_CORE_HOST 266 +#define TEGRA264_CLK_XUSB1_CORE_DEV 267 +#define TEGRA264_CLK_XUSB1_CORE_SUPERSPEED 268 +#define TEGRA264_CLK_XUSB1_FALCON_HOST 269 +#define TEGRA264_CLK_XUSB1_FALCON_SUPERSPEED 270 +#define TEGRA264_CLK_XUSB1_FS_HOST 271 +#define TEGRA264_CLK_XUSB1_FS_DEV 272 +#define TEGRA264_CLK_XUSB1_HS_HSICP 273 +#define TEGRA264_CLK_XUSB1_SS_DEV 274 +#define TEGRA264_CLK_XUSB1_SS_SUPERSPEED 275 +#define TEGRA264_CLK_AON_TOUCH 276 +#define TEGRA264_CLK_AUD_MCLK 277 +#define TEGRA264_CLK_EXTPERIPH1 278 +#define TEGRA264_CLK_EXTPERIPH2 279 +#define TEGRA264_CLK_EXTPERIPH3 280 +#define TEGRA264_CLK_EXTPERIPH4 281 +#define TEGRA264_CLK_JTAG_REG_UNGATED 282 +#define TEGRA264_CLK_IST_BUS 283 +#define TEGRA264_CLK_IST_BUS_RIST_MCC 284 +#define TEGRA264_CLK_MATHS_SEC_RIST 285 +#define TEGRA264_CLK_NAFLL_IST 286 +#define TEGRA264_CLK_RIST_ROOT 287 +#define TEGRA264_CLK_IST_CONTROLLER_RIST 288 +#define TEGRA264_CLK_MSS_ENCRYPT 289 +#define TEGRA264_CLK_EMC 290 +#define TEGRA264_CLK_SPPLL0_CLKOUT100 291 +#define TEGRA264_CLK_SPPLL0_CLKOUT270 292 +#define TEGRA264_CLK_SPPLL1_CLKOUT100 293 +#define TEGRA264_CLK_SPPLL1_CLKOUT270 294 +#define TEGRA264_CLK_DP_LINKA_REF 295 +#define TEGRA264_CLK_DP_LINKB_REF 296 +#define TEGRA264_CLK_DP_LINKC_REF 297 +#define TEGRA264_CLK_DP_LINKD_REF 298 +#define TEGRA264_CLK_PLLNVCSI 299 +#define TEGRA264_CLK_PLLBPMPCAM 300 +#define TEGRA264_CLK_UTMI_PLL1 301 +#define TEGRA264_CLK_UTMI_PLL1_CLKOUT48 302 +#define TEGRA264_CLK_UTMI_PLL1_CLKOUT60 303 +#define TEGRA264_CLK_UTMI_PLL1_CLKOUT480 304 +#define TEGRA264_CLK_NAFLL_ISP 305 +#define TEGRA264_CLK_NAFLL_RCE 306 +#define TEGRA264_CLK_NAFLL_RCE1 307 +#define TEGRA264_CLK_NAFLL_SE 308 +#define TEGRA264_CLK_NAFLL_VI 309 +#define TEGRA264_CLK_NAFLL_VIC 310 +#define TEGRA264_CLK_NAFLL_DCE 311 +#define TEGRA264_CLK_NAFLL_TSEC 312 +#define TEGRA264_CLK_NAFLL_CPAIR0 313 +#define TEGRA264_CLK_NAFLL_CPAIR1 314 +#define TEGRA264_CLK_NAFLL_CPAIR2 315 +#define TEGRA264_CLK_NAFLL_CPAIR3 316 +#define TEGRA264_CLK_NAFLL_CPAIR4 317 +#define TEGRA264_CLK_NAFLL_CPAIR5 318 +#define TEGRA264_CLK_NAFLL_CPAIR6 319 +#define TEGRA264_CLK_NAFLL_GPU_SYS 320 +#define TEGRA264_CLK_NAFLL_GPU_NVD 321 +#define TEGRA264_CLK_NAFLL_GPU_UPROC 322 +#define TEGRA264_CLK_NAFLL_GPU_GPC0 323 +#define TEGRA264_CLK_NAFLL_GPU_GPC1 324 +#define TEGRA264_CLK_NAFLL_GPU_GPC2 325 +#define TEGRA264_CLK_SOR_LINKA_INPUT 326 +#define TEGRA264_CLK_SOR_LINKB_INPUT 327 +#define TEGRA264_CLK_SOR_LINKC_INPUT 328 +#define TEGRA264_CLK_SOR_LINKD_INPUT 329 +#define TEGRA264_CLK_SOR_LINKA_AFIFO 330 +#define TEGRA264_CLK_SOR_LINKB_AFIFO 331 +#define TEGRA264_CLK_SOR_LINKC_AFIFO 332 +#define TEGRA264_CLK_SOR_LINKD_AFIFO 333 +#define TEGRA264_CLK_I2S1_PAD_M 334 +#define TEGRA264_CLK_I2S2_PAD_M 335 +#define TEGRA264_CLK_I2S3_PAD_M 336 +#define TEGRA264_CLK_I2S4_PAD_M 337 +#define TEGRA264_CLK_I2S5_PAD_M 338 +#define TEGRA264_CLK_I2S6_PAD_M 339 +#define TEGRA264_CLK_I2S7_PAD_M 340 +#define TEGRA264_CLK_I2S8_PAD_M 341 +#define TEGRA264_CLK_I2S9_PAD_M 342 +#define TEGRA264_CLK_BPMP_NIC 343 +#define TEGRA264_CLK_CLK1M 344 +#define TEGRA264_CLK_RDET 345 +#define TEGRA264_CLK_ADC_SOC_REF 346 +#define TEGRA264_CLK_UPHY0_PLL0_TXREF 347 +#define TEGRA264_CLK_EQOS_TX 348 +#define TEGRA264_CLK_EQOS_TX_M 349 +#define TEGRA264_CLK_EQOS_RX_PCS_IN 350 +#define TEGRA264_CLK_EQOS_RX_PCS_M 351 +#define TEGRA264_CLK_EQOS_RX_IN 352 +#define TEGRA264_CLK_EQOS_RX 353 +#define TEGRA264_CLK_EQOS_RX_M 354 +#define TEGRA264_CLK_MGBE0_UPHY1_PLL_TXREF 355 +#define TEGRA264_CLK_MGBE0_TX 356 +#define TEGRA264_CLK_MGBE0_TX_M 357 +#define TEGRA264_CLK_MGBE0_RX_PCS_IN 358 +#define TEGRA264_CLK_MGBE0_RX_PCS_M 359 +#define TEGRA264_CLK_MGBE0_RX_IN 360 +#define TEGRA264_CLK_MGBE0_RX_M 361 +#define TEGRA264_CLK_MGBE1_UPHY1_PLL_TXREF 362 +#define TEGRA264_CLK_MGBE1_TX 363 +#define TEGRA264_CLK_MGBE1_TX_M 364 +#define TEGRA264_CLK_MGBE1_RX_PCS_IN 365 +#define TEGRA264_CLK_MGBE1_RX_PCS_M 366 +#define TEGRA264_CLK_MGBE1_RX_IN 367 +#define TEGRA264_CLK_MGBE1_RX_M 368 +#define TEGRA264_CLK_MGBE2_UPHY1_PLL_TXREF 369 +#define TEGRA264_CLK_MGBE2_TX 370 +#define TEGRA264_CLK_MGBE2_TX_M 371 +#define TEGRA264_CLK_MGBE2_RX_PCS_IN 372 +#define TEGRA264_CLK_MGBE2_RX_PCS_M 373 +#define TEGRA264_CLK_MGBE2_RX_IN 374 +#define TEGRA264_CLK_MGBE2_RX_M 375 +#define TEGRA264_CLK_MGBE3_UPHY1_PLL_TXREF 376 +#define TEGRA264_CLK_MGBE3_TX 377 +#define TEGRA264_CLK_MGBE3_TX_M 378 +#define TEGRA264_CLK_MGBE3_RX_PCS_IN 379 +#define TEGRA264_CLK_MGBE3_RX_PCS_M 380 +#define TEGRA264_CLK_MGBE3_RX_IN 381 +#define TEGRA264_CLK_MGBE3_RX_M 382 +#define TEGRA264_CLK_UPHY0_USB_P0_TX_CORE 383 +#define TEGRA264_CLK_UPHY0_USB_P1_TX_CORE 384 +#define TEGRA264_CLK_UPHY0_USB_P2_TX_CORE 385 +#define TEGRA264_CLK_UPHY0_USB_P3_TX_CORE 386 +#define TEGRA264_CLK_UPHY0_USB_P0_TX 387 +#define TEGRA264_CLK_UPHY0_USB_P1_TX 388 +#define TEGRA264_CLK_UPHY0_USB_P2_TX 389 +#define TEGRA264_CLK_UPHY0_USB_P3_TX 390 +#define TEGRA264_CLK_UPHY0_USB_P0_RX_IN 391 +#define TEGRA264_CLK_UPHY0_USB_P1_RX_IN 392 +#define TEGRA264_CLK_UPHY0_USB_P2_RX_IN 393 +#define TEGRA264_CLK_UPHY0_USB_P3_RX_IN 394 +#define TEGRA264_CLK_UPHY0_USB_P0_RX_M 395 +#define TEGRA264_CLK_UPHY0_USB_P1_RX_M 396 +#define TEGRA264_CLK_UPHY0_USB_P2_RX_M 397 +#define TEGRA264_CLK_UPHY0_USB_P3_RX_M 398 +#define TEGRA264_CLK_UPHY0_LANE0_TX_M 399 +#define TEGRA264_CLK_PCIE_C1_XCLK_NOBG_M 400 +#define TEGRA264_CLK_PCIE_C2_XCLK_NOBG_M 401 +#define TEGRA264_CLK_PCIE_C3_XCLK_NOBG_M 402 +#define TEGRA264_CLK_PCIE_C4_XCLK_NOBG_M 403 +#define TEGRA264_CLK_PCIE_C5_XCLK_NOBG_M 404 +#define TEGRA264_CLK_PCIE_C1_L0_RX_M 405 +#define TEGRA264_CLK_PCIE_C1_L1_RX_M 406 +#define TEGRA264_CLK_PCIE_C1_L2_RX_M 407 +#define TEGRA264_CLK_PCIE_C1_L3_RX_M 408 +#define TEGRA264_CLK_PCIE_C2_L0_RX_M 409 +#define TEGRA264_CLK_PCIE_C2_L1_RX_M 410 +#define TEGRA264_CLK_PCIE_C2_L2_RX_M 411 +#define TEGRA264_CLK_PCIE_C2_L3_RX_M 412 +#define TEGRA264_CLK_PCIE_C3_L0_RX_M 413 +#define TEGRA264_CLK_PCIE_C3_L1_RX_M 414 +#define TEGRA264_CLK_PCIE_C4_L0_RX_M 415 +#define TEGRA264_CLK_PCIE_C4_L1_RX_M 416 +#define TEGRA264_CLK_PCIE_C4_L2_RX_M 417 +#define TEGRA264_CLK_PCIE_C4_L3_RX_M 418 +#define TEGRA264_CLK_PCIE_C4_L4_RX_M 419 +#define TEGRA264_CLK_PCIE_C4_L5_RX_M 420 +#define TEGRA264_CLK_PCIE_C4_L6_RX_M 421 +#define TEGRA264_CLK_PCIE_C4_L7_RX_M 422 +#define TEGRA264_CLK_PCIE_C5_L0_RX_M 423 +#define TEGRA264_CLK_PCIE_C5_L1_RX_M 424 +#define TEGRA264_CLK_PCIE_C5_L2_RX_M 425 +#define TEGRA264_CLK_PCIE_C5_L3_RX_M 426 +#define TEGRA264_CLK_MPHY_L0_RX_PWM_BIT_M 427 +#define TEGRA264_CLK_MPHY_L1_RX_PWM_BIT_M 428 +#define TEGRA264_CLK_DBB_UPHY0 429 +#define TEGRA264_CLK_UPHY0_UXL_CORE 430 +#define TEGRA264_CLK_ISC_CPU_ROOT 431 +#define TEGRA264_CLK_ISC_NIC 432 +#define TEGRA264_CLK_CTC_TXCLK0_M 433 +#define TEGRA264_CLK_CTC_TXCLK1_M 434 +#define TEGRA264_CLK_CTC_RXCLK0_M 435 +#define TEGRA264_CLK_CTC_RXCLK1_M 436 +#define TEGRA264_CLK_PLLREFGP_OUT 437 +#define TEGRA264_CLK_PLLREFGP_OUT1 438 +#define TEGRA264_CLK_GPU_SYS 439 +#define TEGRA264_CLK_GPU_NVD 440 +#define TEGRA264_CLK_GPU_UPROC 441 +#define TEGRA264_CLK_GPU_GPC0 442 +#define TEGRA264_CLK_GPU_GPC1 443 +#define TEGRA264_CLK_GPU_GPC2 444 +#define TEGRA264_CLK_PLLX 445 +#define TEGRA264_CLK_APE_SOUNDWIRE_MSRC0 446 +#define TEGRA264_CLK_APE_SOUNDWIRE_DATA_EN_SHAPER 447 +#define TEGRA264_CLK_AO_SOUNDWIRE_MSRC0 448 +#define TEGRA264_CLK_AO_SOUNDWIRE_DATA_EN_SHAPER 449 +#define TEGRA264_CLK_MGBE0_TX_SER 459 +#define TEGRA264_CLK_MGBE1_TX_SER 460 +#define TEGRA264_CLK_MGBE2_TX_SER 461 +#define TEGRA264_CLK_MGBE3_TX_SER 462 +#define TEGRA264_CLK_MGBE0_RX_SER 463 +#define TEGRA264_CLK_MGBE1_RX_SER 464 +#define TEGRA264_CLK_MGBE2_RX_SER 465 +#define TEGRA264_CLK_MGBE3_RX_SER 466 +#define TEGRA264_CLK_DPAUX 467 + +#endif /* DT_BINDINGS_CLOCK_NVIDIA_TEGRA264_H */ diff --git a/include/dt-bindings/reset/nvidia,tegra264.h b/include/dt-bindings/reset/nvidia,tegra264.h new file mode 100644 index 000000000000..a61a56bb232b --- /dev/null +++ b/include/dt-bindings/reset/nvidia,tegra264.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved. */ + +#ifndef DT_BINDINGS_RESET_NVIDIA_TEGRA264_H +#define DT_BINDINGS_RESET_NVIDIA_TEGRA264_H + +#define TEGRA264_RESET_APE_TKE 1 +#define TEGRA264_RESET_CEC 2 +#define TEGRA264_RESET_ADSP_ALL 3 +#define TEGRA264_RESET_RCE_ALL 4 +#define TEGRA264_RESET_UFSHC 5 +#define TEGRA264_RESET_UFSHC_AXI_M 6 +#define TEGRA264_RESET_UFSHC_LP_SEQ 7 +#define TEGRA264_RESET_DPAUX 8 +#define TEGRA264_RESET_EQOS_PCS 9 +#define TEGRA264_RESET_HWPM 10 +#define TEGRA264_RESET_I2C1 11 +#define TEGRA264_RESET_I2C2 12 +#define TEGRA264_RESET_I2C3 13 +#define TEGRA264_RESET_I2C4 14 +#define TEGRA264_RESET_I2C6 15 +#define TEGRA264_RESET_I2C7 16 +#define TEGRA264_RESET_I2C8 17 +#define TEGRA264_RESET_I2C9 18 +#define TEGRA264_RESET_ISP 19 +#define TEGRA264_RESET_LA 20 +#define TEGRA264_RESET_NVCSI 21 +#define TEGRA264_RESET_EQOS_MAC 22 +#define TEGRA264_RESET_PWM10 23 +#define TEGRA264_RESET_PWM2 24 +#define TEGRA264_RESET_PWM3 25 +#define TEGRA264_RESET_PWM4 26 +#define TEGRA264_RESET_PWM5 27 +#define TEGRA264_RESET_PWM9 28 +#define TEGRA264_RESET_QSPI0 29 +#define TEGRA264_RESET_HDA 30 +#define TEGRA264_RESET_HDACODEC 31 +#define TEGRA264_RESET_I2C0 32 +#define TEGRA264_RESET_I2C10 33 +#define TEGRA264_RESET_SDMMC1 34 +#define TEGRA264_RESET_MIPI_CAL 35 +#define TEGRA264_RESET_SPI1 36 +#define TEGRA264_RESET_SPI2 37 +#define TEGRA264_RESET_SPI3 38 +#define TEGRA264_RESET_SPI4 39 +#define TEGRA264_RESET_SPI5 40 +#define TEGRA264_RESET_SPI7 41 +#define TEGRA264_RESET_SPI8 42 +#define TEGRA264_RESET_SPI9 43 +#define TEGRA264_RESET_TACH0 44 +#define TEGRA264_RESET_TSEC 45 +#define TEGRA264_RESET_VI 46 +#define TEGRA264_RESET_VI1 47 +#define TEGRA264_RESET_PVA0_ALL 48 +#define TEGRA264_RESET_VIC 49 +#define TEGRA264_RESET_MPHY_CLK_CTL 50 +#define TEGRA264_RESET_MPHY_L0_RX 51 +#define TEGRA264_RESET_MPHY_L0_TX 52 +#define TEGRA264_RESET_MPHY_L1_RX 53 +#define TEGRA264_RESET_MPHY_L1_TX 54 +#define TEGRA264_RESET_ISP1 55 +#define TEGRA264_RESET_I2C11 56 +#define TEGRA264_RESET_I2C12 57 +#define TEGRA264_RESET_I2C14 58 +#define TEGRA264_RESET_I2C15 59 +#define TEGRA264_RESET_I2C16 60 +#define TEGRA264_RESET_EQOS_MACSEC 61 +#define TEGRA264_RESET_MGBE0_PCS 62 +#define TEGRA264_RESET_MGBE0_MAC 63 +#define TEGRA264_RESET_MGBE0_MACSEC 64 +#define TEGRA264_RESET_MGBE1_PCS 65 +#define TEGRA264_RESET_MGBE1_MAC 66 +#define TEGRA264_RESET_MGBE1_MACSEC 67 +#define TEGRA264_RESET_MGBE2_PCS 68 +#define TEGRA264_RESET_MGBE2_MAC 69 +#define TEGRA264_RESET_MGBE2_MACSEC 70 +#define TEGRA264_RESET_MGBE3_PCS 71 +#define TEGRA264_RESET_MGBE3_MAC 72 +#define TEGRA264_RESET_MGBE3_MACSEC 73 +#define TEGRA264_RESET_ADSP_CORE0 74 +#define TEGRA264_RESET_ADSP_CORE1 75 +#define TEGRA264_RESET_APE 76 +#define TEGRA264_RESET_XUSB1_PADCTL 77 +#define TEGRA264_RESET_AON_CPU_ALL 78 +#define TEGRA264_RESET_AON_HSP 79 +#define TEGRA264_RESET_UART4 80 +#define TEGRA264_RESET_UART5 81 +#define TEGRA264_RESET_UART9 82 +#define TEGRA264_RESET_UART10 83 +#define TEGRA264_RESET_UART8 84 + +#endif /* DT_BINDINGS_RESET_NVIDIA_TEGRA264_H */ -- cgit v1.2.3 From 19c24f7ee39af503b9731067b91add627b70ecb6 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Mon, 7 Jul 2025 13:32:57 -0500 Subject: cpu: Define attack vectors Define 4 new attack vectors that are used for controlling CPU speculation mitigations. These may be individually disabled as part of the mitigations= command line. Attack vector controls are combined with global options like 'auto' or 'auto,nosmt' like 'mitigations=auto,no_user_kernel'. The global options come first in the mitigations= string. Cross-thread mitigations can either remain enabled fully, including potentially disabling SMT ('auto,nosmt'), remain enabled except for disabling SMT ('auto'), or entirely disabled through the new 'no_cross_thread' attack vector option. The default settings for these attack vectors are consistent with existing kernel defaults, other than the automatic disabling of VM-based attack vectors if KVM support is not present. Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250707183316.1349127-3-david.kaplan@amd.com --- include/linux/cpu.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6378370a952f..1fb143ee1ffa 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -198,9 +198,25 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +enum cpu_attack_vectors { + CPU_MITIGATE_USER_KERNEL, + CPU_MITIGATE_USER_USER, + CPU_MITIGATE_GUEST_HOST, + CPU_MITIGATE_GUEST_GUEST, + NR_CPU_ATTACK_VECTORS, +}; + +enum smt_mitigations { + SMT_MITIGATIONS_OFF, + SMT_MITIGATIONS_AUTO, + SMT_MITIGATIONS_ON, +}; + #ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +extern bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v); +extern enum smt_mitigations smt_mitigations; #else static inline bool cpu_mitigations_off(void) { @@ -210,6 +226,11 @@ static inline bool cpu_mitigations_auto_nosmt(void) { return false; } +static inline bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v) +{ + return false; +} +#define smt_mitigations SMT_MITIGATIONS_OFF #endif #endif /* _LINUX_CPU_H_ */ -- cgit v1.2.3 From 1657624a69fcfd3f27ba6223e1c8fb6a16815568 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 1 Jul 2025 06:45:18 +0000 Subject: media: core: export v4l2_translate_cmd video_translate_cmd() can be useful for drivers to convert between the VIDIOC_*32 and VIDIOC_ defines. Let's export it. Now that the function is exported, use this opportunity to rename the function with the v4l2_ prefix, that is less ambiguous than video_ The VIDIOC_*32 defines are not accessible by the drivers, they live in v4l2-compat-ioctl32.c. Reviewed-by: Hans Verkuil Signed-off-by: Ricardo Ribalda Link: https://lore.kernel.org/r/20250701-uvc-grannular-invert-v4-6-8003b9b89f68@chromium.org Signed-off-by: Hans de Goede Signed-off-by: Hans Verkuil --- include/media/v4l2-ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index c6ec87e88dfe..82695c3a300a 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -679,6 +679,7 @@ long int v4l2_compat_ioctl32(struct file *file, unsigned int cmd, #endif unsigned int v4l2_compat_translate_cmd(unsigned int cmd); +unsigned int v4l2_translate_cmd(unsigned int cmd); int v4l2_compat_get_user(void __user *arg, void *parg, unsigned int cmd); int v4l2_compat_put_user(void __user *arg, void *parg, unsigned int cmd); int v4l2_compat_get_array_args(struct file *file, void *mbuf, -- cgit v1.2.3 From 2ab4019aa34dc2aec4a0824fbf1e49300884fbbf Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 7 Jul 2025 18:34:04 +0000 Subject: media: uvcvideo: Introduce V4L2_META_FMT_UVC_MSXU_1_5 The UVC driver provides two metadata types V4L2_META_FMT_UVC, and V4L2_META_FMT_D4XX. The only difference between the two of them is that V4L2_META_FMT_UVC only copies PTS, SCR, size and flags, and V4L2_META_FMT_D4XX copies the whole metadata section. Now we only enable V4L2_META_FMT_D4XX for the Intel D4xx family of devices, but it is useful to have the whole metadata payload for any device where vendors include other metadata, such as the one described by Microsoft: https://learn.microsoft.com/en-us/windows-hardware/drivers/stream/mf-capture-metadata This patch introduces a new format V4L2_META_FMT_UVC_MSXU_1_5, that is identical to V4L2_META_FMT_D4XX. Let the user enable this format with a quirk for now. This way they can test if their devices provide useful metadata without rebuilding the kernel. They can later contribute patches to auto-quirk their devices. We will also work in methods to auto-detect devices compatible with this new metadata format. Suggested-by: Hans de Goede Reviewed-by: Hans de Goede Signed-off-by: Ricardo Ribalda Link: https://lore.kernel.org/r/20250707-uvc-meta-v8-4-ed17f8b1218b@chromium.org Signed-off-by: Hans de Goede Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 1bb1979f6c18..3dd9fa45dde1 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -867,6 +867,7 @@ struct v4l2_pix_format { #define V4L2_META_FMT_VSP1_HGT v4l2_fourcc('V', 'S', 'P', 'T') /* R-Car VSP1 2-D Histogram */ #define V4L2_META_FMT_UVC v4l2_fourcc('U', 'V', 'C', 'H') /* UVC Payload Header metadata */ #define V4L2_META_FMT_D4XX v4l2_fourcc('D', '4', 'X', 'X') /* D4XX Payload Header metadata */ +#define V4L2_META_FMT_UVC_MSXU_1_5 v4l2_fourcc('U', 'V', 'C', 'M') /* UVC MSXU metadata */ #define V4L2_META_FMT_VIVID v4l2_fourcc('V', 'I', 'V', 'D') /* Vivid Metadata */ /* Vendor specific - used for RK_ISP1 camera sub-system */ -- cgit v1.2.3 From 6cb786f040ad35b23b4a7bff8b9d772f22909d48 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 7 Jul 2025 18:34:05 +0000 Subject: media: uvcvideo: Auto-set UVC_QUIRK_MSXU_META If the camera supports the MSXU_CONTROL_METADATA control, auto set the MSXU_META quirk. Reviewed-by: Hans de Goede Signed-off-by: Ricardo Ribalda Link: https://lore.kernel.org/r/20250707-uvc-meta-v8-5-ed17f8b1218b@chromium.org Signed-off-by: Hans de Goede Signed-off-by: Hans Verkuil --- include/linux/usb/uvc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index bce95153e5a6..ee19e9f915b8 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -29,6 +29,9 @@ #define UVC_GUID_EXT_GPIO_CONTROLLER \ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03} +#define UVC_GUID_MSXU_1_5 \ + {0xdc, 0x95, 0x3f, 0x0f, 0x32, 0x26, 0x4e, 0x4c, \ + 0x92, 0xc9, 0xa0, 0x47, 0x82, 0xf4, 0x3b, 0xc8} #define UVC_GUID_FORMAT_MJPEG \ { 'M', 'J', 'P', 'G', 0x00, 0x00, 0x10, 0x00, \ -- cgit v1.2.3 From 56e9a0d8e53f56f313d332888a32a44a71f3a9ab Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:09 -0700 Subject: iommufd: Add mmap interface For vIOMMU passing through HW resources to user space (VMs), allowing a VM to control the passed through HW directly by accessing hardware registers, add an mmap infrastructure to map the physical MMIO pages to user space. Maintain a maple tree per ictx as a translation table managing mmappable regions, from an allocated for-user mmap offset to an iommufd_mmap struct, where it stores the real physical address range for io_remap_pfn_range(). Keep track of the lifecycle of the mmappable region by taking refcount of its owner, so as to enforce user space to unmap the region first before it can destroy its owner object. To allow an IOMMU driver to add and delete mmappable regions onto/from the maple tree, add iommufd_viommu_alloc/destroy_mmap helpers. Link: https://patch.msgid.link/r/9a888a326b12aa5fe940083eae1156304e210fe0.1752126748.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Pranjal Shrivastava Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index fa23439fa483..e3a0cd47384d 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -255,6 +255,11 @@ int _iommufd_object_depend(struct iommufd_object *obj_dependent, struct iommufd_object *obj_depended); void _iommufd_object_undepend(struct iommufd_object *obj_dependent, struct iommufd_object *obj_depended); +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset); +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -275,6 +280,20 @@ _iommufd_object_undepend(struct iommufd_object *obj_dependent, { } +static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset) +{ + return -EOPNOTSUPP; +} + +static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + unsigned long offset) +{ +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { @@ -342,4 +361,27 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, _iommufd_object_undepend(&dependent->member.obj, \ &depended->member.obj); \ }) + +/* + * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure. + * + * To support an mmappable MMIO region, kernel driver must first register it to + * iommufd core to allocate an @offset, during a driver-structure initialization + * (e.g. viommu_init op). Then, it should report to user space this @offset and + * the @length of the MMIO region for mmap syscall. + */ +static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu, + phys_addr_t mmio_addr, + size_t length, + unsigned long *offset) +{ + return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr, + length, offset); +} + +static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu, + unsigned long offset) +{ + _iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset); +} #endif -- cgit v1.2.3 From 62622a8753fa6af3c104f9552863e6473b92fb31 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:12 -0700 Subject: iommu: Allow an input type in hw_info op The hw_info uAPI will support a bidirectional data_type field that can be used as an input field for user space to request for a specific info data. To prepare for the uAPI update, change the iommu layer first: - Add a new IOMMU_HW_INFO_TYPE_DEFAULT as an input, for which driver can output its only (or firstly) supported type - Update the kdoc accordingly - Roll out the type validation in the existing drivers Link: https://patch.msgid.link/r/00f4a2d3d930721f61367014717b3ba2d1e82a81.1752126748.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 3 ++- include/uapi/linux/iommufd.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e06a0fbe4bc7..e8b59ef54e48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -603,7 +603,8 @@ __iommu_copy_struct_to_user(const struct iommu_user_data *dst_data, * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this * op is allocated in the iommu driver and freed by the caller after - * use. + * use. @type can input a requested type and output a supported type. + * Driver should reject an unsupported data @type input * @domain_alloc: Do not use in new drivers * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to * use identity_domain instead. This should only be used diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b928c1ed2395..9c8c304b5de2 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -593,13 +593,15 @@ struct iommu_hw_info_arm_smmuv3 { /** * enum iommu_hw_info_type - IOMMU Hardware Info Types - * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware + * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware * info + * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_DEFAULT = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, }; -- cgit v1.2.3 From a9f10bab2e5084d6746391fccd7bef6ac87620b8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:13 -0700 Subject: iommufd: Allow an input data_type via iommu_hw_info The iommu_hw_info can output via the out_data_type field the vendor data type from a driver, but this only allows driver to report one data type. Now, with SMMUv3 having a Tegra241 CMDQV implementation, it has two sets of types and data structs to report. One way to support that is to use the same type field bidirectionally. Reuse the same field by adding an "in_data_type", allowing user space to request for a specific type and to get the corresponding data. For backward compatibility, since the ioctl handler has never checked an input value, add an IOMMU_HW_INFO_FLAG_INPUT_TYPE to switch between the old output-only field and the new bidirectional field. Link: https://patch.msgid.link/r/887378a7167e1786d9d13cde0c36263ed61823d7.1752126748.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 9c8c304b5de2..32ee02380912 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -628,6 +628,15 @@ enum iommufd_hw_capabilities { IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, }; +/** + * enum iommufd_hw_info_flags - Flags for iommu_hw_info + * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type + * for user space to request for a specific info + */ +enum iommufd_hw_info_flags { + IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0, +}; + /** * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) * @size: sizeof(struct iommu_hw_info) @@ -637,6 +646,12 @@ enum iommufd_hw_capabilities { * data that kernel supports * @data_uptr: User pointer to a user-space buffer used by the kernel to fill * the iommu type specific hardware information data + * @in_data_type: This shares the same field with @out_data_type, making it be + * a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is + * set, an input type carried via this @in_data_type field will + * be valid, requesting for the info data to the given type. If + * IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will + * be seen as IOMMU_HW_INFO_TYPE_DEFAULT * @out_data_type: Output the iommu hardware info type as defined in the enum * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined @@ -666,7 +681,10 @@ struct iommu_hw_info { __u32 dev_id; __u32 data_len; __aligned_u64 data_uptr; - __u32 out_data_type; + union { + __u32 in_data_type; + __u32 out_data_type; + }; __u8 out_max_pasid_log2; __u8 __reserved[3]; __aligned_u64 out_capabilities; -- cgit v1.2.3 From 4dc0d12474f9d4833c3dd96b73d61e406d3f5dc7 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:20 -0700 Subject: iommu/tegra241-cmdqv: Add user-space use support The CMDQV HW supports a user-space use for virtualization cases. It allows the VM to issue guest-level TLBI or ATC_INV commands directly to the queue and executes them without a VMEXIT, as HW will replace the VMID field in a TLBI command and the SID field in an ATC_INV command with the preset VMID and SID. This is built upon the vIOMMU infrastructure by allowing VMM to allocate a VINTF (as a vIOMMU object) and assign VCMDQs (HW QUEUE objs) to the VINTF. So firstly, replace the standard vSMMU model with the VINTF implementation but reuse the standard cache_invalidate op (for unsupported commands) and the standard alloc_domain_nested op (for standard nested STE). Each VINTF has two 64KB MMIO pages (128B per logical VCMDQ): - Page0 (directly accessed by guest) has all the control and status bits. - Page1 (trapped by VMM) has guest-owned queue memory location/size info. VMM should trap the emulated VINTF0's page1 of the guest VM for the guest- level VCMDQ location/size info and forward that to the kernel to translate to a physical memory location to program the VCMDQ HW during an allocation call. Then, it should mmap the assigned VINTF's page0 to the VINTF0 page0 of the guest VM. This allows the guest OS to read and write the guest-own VINTF's page0 for direct control of the VCMDQ HW. For ATC invalidation commands that hold an SID, it requires all devices to register their virtual SIDs to the SID_MATCH registers and their physical SIDs to the pairing SID_REPLACE registers, so that HW can use those as a lookup table to replace those virtual SIDs with the correct physical SIDs. Thus, implement the driver-allocated vDEVICE op with a tegra241_vintf_sid structure to allocate SID_REPLACE and to program the SIDs accordingly. This enables the HW accelerated feature for NVIDIA Grace CPU. Compared to the standard SMMUv3 operating in the nested translation mode trapping CMDQ for TLBI and ATC_INV commands, this gives a huge performance improvement: 70% to 90% reductions of invalidation time were measured by various DMA unmap tests running in a guest OS. Link: https://patch.msgid.link/r/fb0eab83f529440b6aa181798912a6f0afa21eb0.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 32ee02380912..2fecea1973bc 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -591,6 +591,28 @@ struct iommu_hw_info_arm_smmuv3 { __u32 aidr; }; +/** + * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware + * Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV) + * + * @flags: Must be 0 + * @version: Version number for the CMDQ-V HW for PARAM bits[03:00] + * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04] + * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12] + * @__reserved: Must be 0 + * + * VMM can use these fields directly in its emulated global PARAM register. Note + * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM + * bits[11:08] should be set to 0 for log2 of the total number of VINTFs. + */ +struct iommu_hw_info_tegra241_cmdqv { + __u32 flags; + __u8 version; + __u8 log2vcmdqs; + __u8 log2vsids; + __u8 __reserved; +}; + /** * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware @@ -598,12 +620,15 @@ struct iommu_hw_info_arm_smmuv3 { * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type + * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM + * SMMUv3) info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, IOMMU_HW_INFO_TYPE_DEFAULT = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, + IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3, }; /** @@ -972,10 +997,29 @@ struct iommu_fault_alloc { * enum iommu_viommu_type - Virtual IOMMU Type * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type + * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM + * SMMUv3) enabled ARM SMMUv3 type */ enum iommu_viommu_type { IOMMU_VIOMMU_TYPE_DEFAULT = 0, IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2, +}; + +/** + * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface + * (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV) + * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0 + * @out_vintf_mmap_length: mmap length argument for VINTF's page0 + * + * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel + * for user space to mmap the VINTF page0 from the host physical address space + * to the guest physical address space so that a guest kernel can directly R/W + * access to the VINTF page0 in order to control its virtual command queues. + */ +struct iommu_viommu_tegra241_cmdqv { + __aligned_u64 out_vintf_mmap_offset; + __aligned_u64 out_vintf_mmap_length; }; /** @@ -1172,9 +1216,24 @@ struct iommu_veventq_alloc { /** * enum iommu_hw_queue_type - HW Queue Type * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use + * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM + * SMMUv3) Virtual Command Queue (VCMDQ) */ enum iommu_hw_queue_type { IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, + /* + * TEGRA241_CMDQV requirements (otherwise, allocation will fail) + * - alloc starts from the lowest @index=0 in ascending order + * - destroy starts from the last allocated @index in descending order + * - @base_addr must be aligned to @length in bytes and mapped in IOAS + * - @length must be a power of 2, with a minimum 32 bytes and a maximum + * 2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1] + * from struct iommu_hw_info_arm_smmuv3) + * - suggest to back the queue memory with contiguous physical pages or + * a single huge page with alignment of the queue size, and limit the + * emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes) + */ + IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1, }; /** -- cgit v1.2.3 From 32b2d3a57e26804ca96d82a222667ac0fa226cb7 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:21 -0700 Subject: iommu/tegra241-cmdqv: Add IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV support Add a new vEVENTQ type for VINTFs that are assigned to the user space. Simply report the two 64-bit LVCMDQ_ERR_MAPs register values. Link: https://patch.msgid.link/r/68161a980da41fa5022841209638aeff258557b5.1752126748.git.nicolinc@nvidia.com Reviewed-by: Alok Tiwari Reviewed-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 2fecea1973bc..554aacf89ea7 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1146,10 +1146,12 @@ struct iommufd_vevent_header { * enum iommu_veventq_type - Virtual Event Queue Type * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue + * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ */ enum iommu_veventq_type { IOMMU_VEVENTQ_TYPE_DEFAULT = 0, IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, + IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2, }; /** @@ -1173,6 +1175,19 @@ struct iommu_vevent_arm_smmuv3 { __aligned_le64 evt[4]; }; +/** + * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ + * (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV) + * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian. + * (Refer to register LVCMDQ_ERR_MAPs per VINTF ) + * + * The 128-bit register value from HW exclusively reflect the error bits for a + * Virtual Interface represented by a vIOMMU object. Read and report directly. + */ +struct iommu_vevent_tegra241_cmdqv { + __aligned_le64 lvcmdq_err_map[2]; +}; + /** * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) * @size: sizeof(struct iommu_veventq_alloc) -- cgit v1.2.3 From b725441f02c2b31c04a95d0e9ca5420fa029a767 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Thu, 10 Jul 2025 11:20:32 +0800 Subject: bpf: Add attach_type field to bpf_link Attach_type will be set when a link is created by user. It is better to record attach_type in bpf_link generically and have it available universally for all link types. So add the attach_type field in bpf_link and move the sleepable field to avoid unnecessary gap padding. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20250710032038.888700-2-chen.dylane@linux.dev --- include/linux/bpf.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 34dd90ec7fad..a9ee9c14b486 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1729,12 +1729,10 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - /* whether BPF link itself has "sleepable" semantics, which can differ - * from underlying BPF program having a "sleepable" semantics, as BPF - * link's semantics is determined by target attach hook - */ - bool sleepable; + u32 flags; + enum bpf_attach_type attach_type; + /* rcu is used before freeing, work can be used to schedule that * RCU-based freeing before that, so they never overlap */ @@ -1742,6 +1740,11 @@ struct bpf_link { struct rcu_head rcu; struct work_struct work; }; + /* whether BPF link itself has "sleepable" semantics, which can differ + * from underlying BPF program having a "sleepable" semantics, as BPF + * link's semantics is determined by target attach hook + */ + bool sleepable; }; struct bpf_link_ops { @@ -2034,11 +2037,13 @@ int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype); + int cgroup_atype, + enum bpf_attach_type attach_type); void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype) + int cgroup_atype, + enum bpf_attach_type attach_type) { return -EOPNOTSUPP; } @@ -2528,10 +2533,11 @@ int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, - const struct bpf_link_ops *ops, struct bpf_prog *prog); + const struct bpf_link_ops *ops, struct bpf_prog *prog, + enum bpf_attach_type attach_type); void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog, - bool sleepable); + enum bpf_attach_type attach_type, bool sleepable); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); @@ -2883,13 +2889,13 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog) static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, - struct bpf_prog *prog) + struct bpf_prog *prog, enum bpf_attach_type attach_type) { } static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog, - bool sleepable) + enum bpf_attach_type attach_type, bool sleepable) { } -- cgit v1.2.3 From 9b8d543dc2bbf5d3a1e2d60049df94ae2bc68b28 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Thu, 10 Jul 2025 11:20:33 +0800 Subject: bpf: Remove attach_type in bpf_cgroup_link Use attach_type in bpf_link, and remove it in bpf_cgroup_link. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20250710032038.888700-3-chen.dylane@linux.dev --- include/linux/bpf-cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 70c8b94e797a..082ccd8ad96b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -103,7 +103,6 @@ struct bpf_cgroup_storage { struct bpf_cgroup_link { struct bpf_link link; struct cgroup *cgroup; - enum bpf_attach_type type; }; struct bpf_prog_list { -- cgit v1.2.3 From 6e816e1c052b453a93aeb8b57ede9acde58c458d Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Thu, 10 Jul 2025 11:20:35 +0800 Subject: bpf: Remove location field in tcx_link Use attach_type in bpf_link to replace the location filed, and remove location field in tcx_link. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Daniel Borkmann Acked-by: Jiri Olsa Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20250710032038.888700-5-chen.dylane@linux.dev --- include/net/tcx.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcx.h b/include/net/tcx.h index 5ce0ce9e0c02..23a61af13547 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -20,7 +20,6 @@ struct tcx_entry { struct tcx_link { struct bpf_link link; struct net_device *dev; - u32 location; }; static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress) -- cgit v1.2.3 From 0eeeebdcc5feeec48118f7a3df2ac818e694ccc7 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Thu, 10 Jul 2025 11:20:37 +0800 Subject: bpf: Remove attach_type in bpf_tracing_link Use attach_type in bpf_link, and remove it in bpf_tracing_link. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20250710032038.888700-7-chen.dylane@linux.dev --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9ee9c14b486..bc887831eaa5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1784,7 +1784,6 @@ struct bpf_shim_tramp_link { struct bpf_tracing_link { struct bpf_tramp_link link; - enum bpf_attach_type attach_type; struct bpf_trampoline *trampoline; struct bpf_prog *tgt_prog; }; -- cgit v1.2.3 From daec29dcc8731b7596690ab9f647839e4584a86d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 17 Jun 2025 19:08:14 +0200 Subject: locking/mutex: Mark devm_mutex_init() as __must_check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_mutex_init() can fail. With CONFIG_DEBUG_MUTEXES=y the mutex will be marked as unusable and trigger errors on usage. Enforce all callers check the return value through the compiler. As devm_mutex_init() itself is a macro, it can not be annotated directly. Annotate __devm_mutex_init() instead. Unfortunately __must_check/warn_unused_result don't propagate through statement expression. So move the statement expression into the argument list of the call to __devm_mutex_init() through a helper macro. Suggested-by: Peter Zijlstra Signed-off-by: Thomas Weißschuh Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20250617-must_check-devm_mutex_init-v7-3-d9e449f4d224@weissschuh.net --- include/linux/mutex.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a039fa8c1780..00afd341d293 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -126,11 +126,11 @@ do { \ #ifdef CONFIG_DEBUG_MUTEXES -int __devm_mutex_init(struct device *dev, struct mutex *lock); +int __must_check __devm_mutex_init(struct device *dev, struct mutex *lock); #else -static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) +static inline int __must_check __devm_mutex_init(struct device *dev, struct mutex *lock) { /* * When CONFIG_DEBUG_MUTEXES is off mutex_destroy() is just a nop so @@ -141,14 +141,17 @@ static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) #endif -#define devm_mutex_init(dev, mutex) \ +#define __mutex_init_ret(mutex) \ ({ \ typeof(mutex) mutex_ = (mutex); \ \ mutex_init(mutex_); \ - __devm_mutex_init(dev, mutex_); \ + mutex_; \ }) +#define devm_mutex_init(dev, mutex) \ + __devm_mutex_init(dev, __mutex_init_ret(mutex)) + /* * See kernel/locking/mutex.c for detailed documentation of these APIs. * Also see Documentation/locking/mutex-design.rst. -- cgit v1.2.3 From 30dbb2d0e16fce445581049ebcd9043837a843ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:53 +0000 Subject: net_sched: act: annotate data-races in tcf_lastuse_update() and tcf_tm_dump() tcf_tm_dump() reads fields that can be changed concurrently, and tcf_lastuse_update() might race against itself. Add READ_ONCE() and WRITE_ONCE() annotations. Fetch jiffies once in tcf_tm_dump(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 04781c92b43d..2894cfff2da3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -76,19 +76,24 @@ static inline void tcf_lastuse_update(struct tcf_t *tm) { unsigned long now = jiffies; - if (tm->lastuse != now) - tm->lastuse = now; - if (unlikely(!tm->firstuse)) - tm->firstuse = now; + if (READ_ONCE(tm->lastuse) != now) + WRITE_ONCE(tm->lastuse, now); + if (unlikely(!READ_ONCE(tm->firstuse))) + WRITE_ONCE(tm->firstuse, now); } static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) { - dtm->install = jiffies_to_clock_t(jiffies - stm->install); - dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse); - dtm->firstuse = stm->firstuse ? - jiffies_to_clock_t(jiffies - stm->firstuse) : 0; - dtm->expires = jiffies_to_clock_t(stm->expires); + unsigned long firstuse, now = jiffies; + + dtm->install = jiffies_to_clock_t(now - READ_ONCE(stm->install)); + dtm->lastuse = jiffies_to_clock_t(now - READ_ONCE(stm->lastuse)); + + firstuse = READ_ONCE(stm->firstuse); + dtm->firstuse = firstuse ? + jiffies_to_clock_t(now - firstuse) : 0; + + dtm->expires = jiffies_to_clock_t(READ_ONCE(stm->expires)); } static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) -- cgit v1.2.3 From 0d752877705c0252ef2726e4c63c5573f048951c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:54 +0000 Subject: net_sched: act_connmark: use RCU in tcf_connmark_dump() Also storing tcf_action into struct tcf_connmark_parms makes sure there is no discrepancy in tcf_connmark_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_connmark.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index e8dd77a96748..a5ce83f3eea4 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -7,6 +7,7 @@ struct tcf_connmark_parms { struct net *net; u16 zone; + int action; struct rcu_head rcu; }; -- cgit v1.2.3 From ba9dc9c14038b5f721e193f9e69ab73fd2f7bdd2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:55 +0000 Subject: net_sched: act_csum: use RCU in tcf_csum_dump() Also storing tcf_action into struct tcf_csum_params makes sure there is no discrepancy in tcf_csum_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_csum.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 2515da0142a6..8d0c7a9f9345 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -8,6 +8,7 @@ struct tcf_csum_params { u32 update_flags; + int action; struct rcu_head rcu; }; -- cgit v1.2.3 From 554e66bad84ce4181ad91a2ae9cc74c7c440e836 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:56 +0000 Subject: net_sched: act_ct: use RCU in tcf_ct_dump() Also storing tcf_action into struct tcf_ct_params makes sure there is no discrepancy in tcf_ct_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_ct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index e6b45cb27ebf..8b90c86c0b0d 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -13,7 +13,7 @@ struct tcf_ct_params { struct nf_conntrack_helper *helper; struct nf_conn *tmpl; u16 zone; - + int action; u32 mark; u32 mark_mask; -- cgit v1.2.3 From d300335b4e18672913dd792ff9f49e6cccf41d26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:57 +0000 Subject: net_sched: act_ctinfo: use atomic64_t for three counters Commit 21c167aa0ba9 ("net/sched: act_ctinfo: use percpu stats") missed that stats_dscp_set, stats_dscp_error and stats_cpmark_set might be written (and read) locklessly. Use atomic64_t for these three fields, I doubt act_ctinfo is used heavily on big SMP hosts anyway. Fixes: 24ec483cec98 ("net: sched: Introduce act_ctinfo action") Signed-off-by: Eric Dumazet Cc: Pedro Tammela Link: https://patch.msgid.link/20250709090204.797558-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_ctinfo.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h index f071c1d70a25..a04bcac7adf4 100644 --- a/include/net/tc_act/tc_ctinfo.h +++ b/include/net/tc_act/tc_ctinfo.h @@ -18,9 +18,9 @@ struct tcf_ctinfo_params { struct tcf_ctinfo { struct tc_action common; struct tcf_ctinfo_params __rcu *params; - u64 stats_dscp_set; - u64 stats_dscp_error; - u64 stats_cpmark_set; + atomic64_t stats_dscp_set; + atomic64_t stats_dscp_error; + atomic64_t stats_cpmark_set; }; enum { -- cgit v1.2.3 From 799c94178cf9c9e80575b05b7479396de8b42b61 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:58 +0000 Subject: net_sched: act_ctinfo: use RCU in tcf_ctinfo_dump() Also storing tcf_action into struct tcf_ctinfo_params makes sure there is no discrepancy in tcf_ctinfo_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_ctinfo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h index a04bcac7adf4..7fe01ab236da 100644 --- a/include/net/tc_act/tc_ctinfo.h +++ b/include/net/tc_act/tc_ctinfo.h @@ -7,6 +7,7 @@ struct tcf_ctinfo_params { struct rcu_head rcu; struct net *net; + int action; u32 dscpmask; u32 dscpstatemask; u32 cpmarkmask; -- cgit v1.2.3 From 8151684e339996ffe6d65968c5eea154366539f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:01:59 +0000 Subject: net_sched: act_mpls: use RCU in tcf_mpls_dump() Also storing tcf_action into struct tcf_mpls_params makes sure there is no discrepancy in tcf_mpls_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_mpls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_mpls.h b/include/net/tc_act/tc_mpls.h index d452e5e94fd0..dd067bd4018d 100644 --- a/include/net/tc_act/tc_mpls.h +++ b/include/net/tc_act/tc_mpls.h @@ -10,6 +10,7 @@ struct tcf_mpls_params { int tcfm_action; u32 tcfm_label; + int action; /* tcf_action */ u8 tcfm_tc; u8 tcfm_ttl; u8 tcfm_bos; -- cgit v1.2.3 From 5d28928668a2ef6182401ddca7ab4064bf349e3e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:02:00 +0000 Subject: net_sched: act_nat: use RCU in tcf_nat_dump() Also storing tcf_action into struct tcf_nat_params makes sure there is no discrepancy in tcf_nat_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_nat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h index c869274ac529..ae35f4009445 100644 --- a/include/net/tc_act/tc_nat.h +++ b/include/net/tc_act/tc_nat.h @@ -6,6 +6,7 @@ #include struct tcf_nat_parms { + int action; __be32 old_addr; __be32 new_addr; __be32 mask; -- cgit v1.2.3 From 9d096746572616a50cac4906f528a1959c0ee1c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:02:01 +0000 Subject: net_sched: act_pedit: use RCU in tcf_pedit_dump() Also storing tcf_action into struct tcf_pedit_params makes sure there is no discrepancy in tcf_pedit_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_pedit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 83fe39931781..f58ee15cd858 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit_key_ex { struct tcf_pedit_parms { struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; + int action; u32 tcfp_off_max_hint; unsigned char tcfp_nkeys; unsigned char tcfp_flags; -- cgit v1.2.3 From cec7a5c6c695ba2226b6120dc330e3bea3ea96f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:02:02 +0000 Subject: net_sched: act_police: use RCU in tcf_police_dump() Also storing tcf_action into struct tcf_police_params makes sure there is no discrepancy in tcf_police_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_police.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 490d88cb5233..a89fc8e68b1e 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -5,10 +5,11 @@ #include struct tcf_police_params { + int action; int tcfp_result; u32 tcfp_ewma_rate; - s64 tcfp_burst; u32 tcfp_mtu; + s64 tcfp_burst; s64 tcfp_mtu_ptoks; s64 tcfp_pkt_burst; struct psched_ratecfg rate; -- cgit v1.2.3 From 1f376373bd225c90381b745e38fa65a9386f7f8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2025 09:02:03 +0000 Subject: net_sched: act_skbedit: use RCU in tcf_skbedit_dump() Also storing tcf_action into struct tcf_skbedit_params makes sure there is no discrepancy in tcf_skbedit_act(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250709090204.797558-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_skbedit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 9649600fb3dc..31b2cd0bebb5 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -12,6 +12,7 @@ #include struct tcf_skbedit_params { + int action; u32 flags; u32 priority; u32 mark; -- cgit v1.2.3 From 8feaf9832be52be16e588029366e27940f6b88ea Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 9 Jul 2025 09:42:08 +0300 Subject: net/mlx5: Expose HCA capability bits for mkey max page size Expose the HCA capability for maximal page size that can be configured for an mkey. Used for enforcing capabilities when working with highly contiguous memory and using large page sizes. Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/3e4d3fda37934430f65f72601519e22bf396fd05.1751979184.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0e93f342be09..a1bd92ed8f3a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2171,7 +2171,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; - u8 reserved_at_3a0[0xa]; + u8 reserved_at_3a0[0x2]; + u8 max_mkey_log_entity_size_fixed_buffer[0x6]; + u8 reserved_at_3a8[0x2]; u8 max_mkey_log_entity_size_mtt[0x6]; u8 max_rqt_vhca_id[0x10]; -- cgit v1.2.3 From c4f96972c3c206ac8f6770b5ecd5320b561d0058 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Wed, 9 Jul 2025 09:42:09 +0300 Subject: RDMA/mlx5: Fix UMR modifying of mkey page size When changing the page size on an mkey, the driver needs to set the appropriate bits in the mkey mask to indicate which fields are being modified. The 6th bit of a page size in mlx5 driver is considered an extension, and this bit has a dedicated capability and mask bits. Previously, the driver was not setting this mask in the mkey mask when performing page size changes, regardless of its hardware support, potentially leading to an incorrect page size updates. This fixes the issue by setting the relevant bit in the mkey mask when performing page size changes on an mkey and the 6th bit of this field is supported by the hardware. Fixes: cef7dde8836a ("net/mlx5: Expand mkey page size to support 6 bits") Signed-off-by: Edward Srouji Reviewed-by: Michael Guralnik Link: https://patch.msgid.link/9f43a9c73bf2db6085a99dc836f7137e76579f09.1751979184.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 6822cfa5f4ad..9d2467f982ad 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -280,6 +280,7 @@ enum { MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE = 1ull << 25, MLX5_MKEY_MASK_FREE = 1ull << 29, + MLX5_MKEY_MASK_PAGE_SIZE_5 = 1ull << 42, MLX5_MKEY_MASK_RELAXED_ORDERING_READ = 1ull << 47, }; -- cgit v1.2.3 From cbe080f931f48bc7b054008fc2567d1c8c247a89 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 9 Jul 2025 15:41:06 +0300 Subject: net/mlx5: Expose disciplined_fr_counter through HCA capabilities in mlx5_ifc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the `disciplined_fr_counter` capability bit to indicate that the device’s free-running cycle counter is disciplined to real-time. Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1752064867-16874-2-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a1bd92ed8f3a..d7684bb28a3a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1846,7 +1846,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x3]; + u8 disciplined_fr_counter[0x1]; + u8 reserved_at_271[0x2]; u8 qp_error_syndrome[0x1]; u8 reserved_at_274[0x2]; u8 lag_dct[0x2]; -- cgit v1.2.3 From cd1746cb6555a2238c4aae9f9d60b637a61bf177 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 9 Jul 2025 15:41:07 +0300 Subject: net/mlx5: IFC updates for disabled host PF The port 2 host PF can be disabled, this bit reflects that setting. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1752064867-16874-3-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d7684bb28a3a..639dd0b56655 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -12383,7 +12383,9 @@ struct mlx5_ifc_mtrc_ctrl_bits { struct mlx5_ifc_host_params_context_bits { u8 host_number[0x8]; - u8 reserved_at_8[0x7]; + u8 reserved_at_8[0x5]; + u8 host_pf_not_exist[0x1]; + u8 reserved_at_14[0x1]; u8 host_pf_disabled[0x1]; u8 host_num_of_vfs[0x10]; -- cgit v1.2.3 From 1a40c362ae265ca4004f7373b34c22af6810f6cb Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 8 Jul 2025 20:23:06 +0000 Subject: RDMA/uverbs: Add a common way to create CQ with umem Add ioctl command attributes and a common handling for the option to create CQs with memory buffers passed from userspace. When required attributes are supplied, create umem and provide it for driver's use. The extension enables creation of CQs on top of preallocated CPU virtual or device memory buffers, by supplying VA or dmabuf fd, in a common way. Drivers can support this flow by initializing a new create_cq_umem fp field in their ops struct, with a function that can handle the new parameter. Signed-off-by: Michael Margolin Link: https://patch.msgid.link/20250708202308.24783-2-mrgolin@amazon.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 4 ++++ include/uapi/rdma/ib_user_ioctl_cmds.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1d123812a1f9..3fb1c963eeb0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2486,6 +2486,10 @@ struct ib_device_ops { int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, struct uverbs_attr_bundle *attrs); + int (*create_cq_umem)(struct ib_cq *cq, + const struct ib_cq_init_attr *attr, + struct ib_umem *umem, + struct uverbs_attr_bundle *attrs); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index ac7b162611ed..5f3e5bee51b2 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -105,6 +105,10 @@ enum uverbs_attrs_create_cq_cmd_attr_ids { UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_CREATE_CQ_EVENT_FD, + UVERBS_ATTR_CREATE_CQ_BUFFER_VA, + UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH, + UVERBS_ATTR_CREATE_CQ_BUFFER_FD, + UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET, }; enum uverbs_attrs_destroy_cq_cmd_attr_ids { -- cgit v1.2.3 From c897c2c8b8e82981df10df546c753ac857612937 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 8 Jul 2025 20:23:07 +0000 Subject: RDMA/core: Add umem "is_contiguous" and "start_dma_addr" helpers In some cases drivers may need to check if a given umem is contiguous. Add a helper function in core code so that drivers don't need to deal with umem or scatter-gather lists structure. Additionally add a helper for getting umem's start DMA address and use it in other helper functions that open code it. Signed-off-by: Michael Margolin Link: https://patch.msgid.link/20250708202308.24783-3-mrgolin@amazon.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 7dc7b1cc71b5..0a8e092c0ea8 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -52,11 +52,15 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline dma_addr_t ib_umem_start_dma_addr(struct ib_umem *umem) +{ + return sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem); +} + static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, unsigned long pgsz) { - return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) & - (pgsz - 1); + return ib_umem_start_dma_addr(umem) & (pgsz - 1); } static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, @@ -135,14 +139,27 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, unsigned long pgsz_bitmap, u64 pgoff_bitmask) { - struct scatterlist *sg = umem->sgt_append.sgt.sgl; dma_addr_t dma_addr; - dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + dma_addr = ib_umem_start_dma_addr(umem); return ib_umem_find_best_pgsz(umem, pgsz_bitmap, dma_addr & pgoff_bitmask); } +static inline bool ib_umem_is_contiguous(struct ib_umem *umem) +{ + dma_addr_t dma_addr; + unsigned long pgsz; + + /* + * Select the smallest aligned page that can contain the whole umem if + * it was contiguous. + */ + dma_addr = ib_umem_start_dma_addr(umem); + pgsz = roundup_pow_of_two((dma_addr ^ (umem->length - 1 + dma_addr)) + 1); + return !!ib_umem_find_best_pgoff(umem, pgsz, U64_MAX); +} + struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, unsigned long offset, size_t size, int fd, int access, -- cgit v1.2.3 From 9fb3dd85197f5e5901a81b104a0f8b513148d138 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 8 Jul 2025 20:23:08 +0000 Subject: RDMA/efa: Add CQ with external memory support Add an option to create CQ using external memory instead of allocating in the driver. The memory can be passed from userspace by dmabuf fd and an offset or a VA. One of the possible usages is creating CQs that reside in accelerator memory, allowing low latency asynchronous direct polling from the accelerator device. Add a capability bit to reflect on the feature support. Reviewed-by: Daniel Kranzdorf Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Link: https://patch.msgid.link/20250708202308.24783-4-mrgolin@amazon.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/uapi/rdma/efa-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 11b94b0b035b..98b71b9979f8 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -131,6 +131,7 @@ enum { EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4, EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5, EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6, + EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM = 1 << 7, }; struct efa_ibv_ex_query_device_resp { -- cgit v1.2.3 From a6cfa4c8833944f8912c1fa7f95795753f6376ea Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 12 Jul 2025 18:37:12 -0500 Subject: PM: hibernate: Add stub for pm_hibernate_is_recovering() Randy reports that amdgpu fails to compile with the following error: ERROR: modpost: "pm_hibernate_is_recovering" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! This happens because pm_hibernate_is_recovering() is only compiled when CONFIG_PM_SLEEP is set. Add a stub for it so that drivers don't need to depend upon CONFIG_PM. Cc: Samuel Zhang Reported-by: Randy Dunlap Closes: https://lore.kernel.org/dri-devel/CAJZ5v0h1CX+aTu7dFy6vB-9LM6t5J4rt7Su3qVnq1xx-BFAm=Q@mail.gmail.com/T/#m2b9fe212b35fde11d58fcbc4e0727bc02ebba7b0 Fixes: c2aaddbd2dede ("PM: hibernate: add new api pm_hibernate_is_recovering()") Acked-by: Rafael J. Wysocki Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250712233715.821424-1-superm1@kernel.org Signed-off-by: Mario Limonciello --- include/linux/suspend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 293137210fdf..fcb150ee83b6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -426,8 +426,6 @@ int is_hibernate_resume_dev(dev_t dev); static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif -bool pm_hibernate_is_recovering(void); - /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ @@ -478,6 +476,7 @@ extern unsigned int lock_system_sleep(void); extern void unlock_system_sleep(unsigned int); extern bool pm_sleep_transition_in_progress(void); +bool pm_hibernate_is_recovering(void); #else /* !CONFIG_PM_SLEEP */ @@ -508,6 +507,7 @@ static inline unsigned int lock_system_sleep(void) { return 0; } static inline void unlock_system_sleep(unsigned int flags) {} static inline bool pm_sleep_transition_in_progress(void) { return false; } +static inline bool pm_hibernate_is_recovering(void) { return false; } #endif /* !CONFIG_PM_SLEEP */ -- cgit v1.2.3 From 11d58620dfd0c52b0c49b04d28707c7a5a2d00ae Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 1 Jul 2025 16:37:51 -0500 Subject: iio: adc: ad_sigma_delta: use u8 instead of uint8_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace uint8_t with u8 in the ad_sigma_delta driver. Technically, uint8_t comes from the C standard library, while u8 is a Linux kernel type. Since we don't use the C standard library in the kernel, we should use the kernel types instead. There is also one instance where int64_t is replaced with s64. Signed-off-by: David Lechner Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250701-iio-adc-ad7173-add-spi-offload-support-v3-3-42abb83e3dac@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index f242b285081b..5056677c9941 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -94,7 +94,7 @@ struct ad_sigma_delta { bool bus_locked; bool keep_cs_asserted; - uint8_t comm; + u8 comm; const struct ad_sigma_delta_info *info; unsigned int active_slots; @@ -105,7 +105,7 @@ struct ad_sigma_delta { bool status_appended; /* map slots to channels in order to know what to expect from devices */ unsigned int *slots; - uint8_t *samples_buf; + u8 *samples_buf; /* * DMA (thus cache coherency maintenance) requires the @@ -114,8 +114,8 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); - uint8_t rx_buf[16] __aligned(8); + u8 tx_buf[4] __aligned(IIO_DMA_MINALIGN); + u8 rx_buf[16] __aligned(8); }; static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, @@ -177,7 +177,7 @@ static inline int ad_sigma_delta_postprocess_sample(struct ad_sigma_delta *sd, return 0; } -void ad_sd_set_comm(struct ad_sigma_delta *sigma_delta, uint8_t comm); +void ad_sd_set_comm(struct ad_sigma_delta *sigma_delta, u8 comm); int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int val); int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, -- cgit v1.2.3 From db63e45a7da0678652c69f7cbed2cbf2a9922b39 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 1 Jul 2025 16:37:56 -0500 Subject: iio: adc: ad_sigma_delta: use spi_optimize_message() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use spi_optimize_message() to improve the performance of buffered reads. By setting up the SPI message and pre-optimizing it in the buffer postenable callback, we can reduce overhead during each sample read. A rough estimate shows that this reduced the CPU usage of the interrupt handler thread from 22% to 16% using an EVAL-AD4112ARDZ board on a DE10-Nano (measuring a single channel at the default 6.2 kHz sample rate). Signed-off-by: David Lechner Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250701-iio-adc-ad7173-add-spi-offload-support-v3-8-42abb83e3dac@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 5056677c9941..2037bb68b441 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -105,6 +105,8 @@ struct ad_sigma_delta { bool status_appended; /* map slots to channels in order to know what to expect from devices */ unsigned int *slots; + struct spi_message sample_msg; + struct spi_transfer sample_xfer[2]; u8 *samples_buf; /* @@ -116,6 +118,7 @@ struct ad_sigma_delta { */ u8 tx_buf[4] __aligned(IIO_DMA_MINALIGN); u8 rx_buf[16] __aligned(8); + u8 sample_addr; }; static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, -- cgit v1.2.3 From 219da3ea842a156e5808176e11db256db9798f6c Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 1 Jul 2025 16:37:59 -0500 Subject: iio: adc: ad_sigma_delta: add SPI offload support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SPI offload support to the ad_sigma_delta module. When the SPI controller has SPI offload capabilities, the module will now use that for buffered reads instead of the RDY interrupt trigger. Drivers that use the ad_sigma_delta module will have to opt into this by setting supports_spi_offload since each driver will likely need additional changes before SPI offload can be used. This will allow us to gradually enable SPI offload support for each driver. Signed-off-by: David Lechner Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250701-iio-adc-ad7173-add-spi-offload-support-v3-11-42abb83e3dac@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 2037bb68b441..6e70a412e218 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -31,6 +31,8 @@ struct ad_sigma_delta; struct device; struct gpio_desc; struct iio_dev; +struct spi_offload; +struct spi_offload_trigger; /** * struct ad_sigma_delta_info - Sigma Delta driver specific callbacks and options @@ -47,6 +49,10 @@ struct iio_dev; * @has_registers: true if the device has writable and readable registers, false * if there is just one read-only sample data shift register. * @has_named_irqs: Set to true if there is more than one IRQ line. + * @supports_spi_offload: Set to true if the driver supports SPI offload. Often + * special considerations are needed for scan_type and other channel + * info, so individual drivers have to set this to let the core + * code know that it can use SPI offload if it is available. * @addr_shift: Shift of the register address in the communications register. * @read_mask: Mask for the communications register having the read bit set. * @status_ch_mask: Mask for the channel number stored in status register. @@ -65,6 +71,7 @@ struct ad_sigma_delta_info { int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample); bool has_registers; bool has_named_irqs; + bool supports_spi_offload; unsigned int addr_shift; unsigned int read_mask; unsigned int status_ch_mask; @@ -108,6 +115,8 @@ struct ad_sigma_delta { struct spi_message sample_msg; struct spi_transfer sample_xfer[2]; u8 *samples_buf; + struct spi_offload *offload; + struct spi_offload_trigger *offload_trigger; /* * DMA (thus cache coherency maintenance) requires the @@ -121,6 +130,11 @@ struct ad_sigma_delta { u8 sample_addr; }; +static inline bool ad_sigma_delta_has_spi_offload(struct ad_sigma_delta *sd) +{ + return sd->offload != NULL; +} + static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, unsigned int channel) { -- cgit v1.2.3 From 3df2817d5a949646a17bc87677520b358644ee70 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 3 Jul 2025 16:11:41 +0200 Subject: dt-bindings: iio: adc: mt6359: Add MT6363 PMIC AuxADC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a compatible and channel bindings for MediaTek's MT6363 PMIC, featuring an Auxiliary ADC IP with 15 ADC channels used for both internal temperatures and voltages and for external voltage inputs. Acked-by: Rob Herring (Arm) Reviewed-by: Nícolas F. R. A. Prado Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250703141146.171431-2-angelogioacchino.delregno@collabora.com Signed-off-by: Jonathan Cameron --- .../dt-bindings/iio/adc/mediatek,mt6363-auxadc.h | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6363-auxadc.h (limited to 'include') diff --git a/include/dt-bindings/iio/adc/mediatek,mt6363-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6363-auxadc.h new file mode 100644 index 000000000000..92d135477d0e --- /dev/null +++ b/include/dt-bindings/iio/adc/mediatek,mt6363-auxadc.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_MEDIATEK_MT6363_AUXADC_H +#define _DT_BINDINGS_MEDIATEK_MT6363_AUXADC_H + +/* ADC Channel Index */ +#define MT6363_AUXADC_BATADC 0 +#define MT6363_AUXADC_VCDT 1 +#define MT6363_AUXADC_BAT_TEMP 2 +#define MT6363_AUXADC_CHIP_TEMP 3 +#define MT6363_AUXADC_VSYSSNS 4 +#define MT6363_AUXADC_VTREF 5 +#define MT6363_AUXADC_VCORE_TEMP 6 +#define MT6363_AUXADC_VPROC_TEMP 7 +#define MT6363_AUXADC_VGPU_TEMP 8 +#define MT6363_AUXADC_VIN1 9 +#define MT6363_AUXADC_VIN2 10 +#define MT6363_AUXADC_VIN3 11 +#define MT6363_AUXADC_VIN4 12 +#define MT6363_AUXADC_VIN5 13 +#define MT6363_AUXADC_VIN6 14 +#define MT6363_AUXADC_VIN7 15 + +#endif -- cgit v1.2.3 From 00da77d1d226b9d8298262fd61b6281abd89a9d4 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 3 Jul 2025 16:11:42 +0200 Subject: dt-bindings: iio: adc: mt6359: Add MT6373 PMIC AuxADC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a compatible and channel bindings for MediaTek's MT6373 PMIC, featuring an Auxiliary ADC IP with 15 ADC channels for external (SoC) temperatures and external voltage inputs. Acked-by: Rob Herring (Arm) Reviewed-by: Nícolas F. R. A. Prado Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250703141146.171431-3-angelogioacchino.delregno@collabora.com Signed-off-by: Jonathan Cameron --- include/dt-bindings/iio/adc/mediatek,mt6373-auxadc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6373-auxadc.h (limited to 'include') diff --git a/include/dt-bindings/iio/adc/mediatek,mt6373-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6373-auxadc.h new file mode 100644 index 000000000000..17cab86d355e --- /dev/null +++ b/include/dt-bindings/iio/adc/mediatek,mt6373-auxadc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_MEDIATEK_MT6373_AUXADC_H +#define _DT_BINDINGS_MEDIATEK_MT6373_AUXADC_H + +/* ADC Channel Index */ +#define MT6373_AUXADC_CHIP_TEMP 0 +#define MT6373_AUXADC_VCORE_TEMP 1 +#define MT6373_AUXADC_VPROC_TEMP 2 +#define MT6373_AUXADC_VGPU_TEMP 3 +#define MT6373_AUXADC_VIN1 4 +#define MT6373_AUXADC_VIN2 5 +#define MT6373_AUXADC_VIN3 6 +#define MT6373_AUXADC_VIN4 7 +#define MT6373_AUXADC_VIN5 8 +#define MT6373_AUXADC_VIN6 9 +#define MT6373_AUXADC_VIN7 10 + +#endif -- cgit v1.2.3 From 69d5b62c4bded309332add0fac6760239ff47a68 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 10 Jul 2025 20:40:01 +0800 Subject: ASoC: codec: tlv320aic32x4: Drop aic32x4_pdata usage There is no machine is using aic32x4_pdata as platform_data, so remove the dead code. Cc: Markus Niebel Cc: Alexander Stein Reviewed-by: Alexander Stein Signed-off-by: Peng Fan Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20250710-asoc-gpio-1-v2-1-2233b272a1a6@nxp.com Signed-off-by: Mark Brown --- include/sound/tlv320aic32x4.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/sound/tlv320aic32x4.h b/include/sound/tlv320aic32x4.h index 0abf74d7edbd..b779d671a995 100644 --- a/include/sound/tlv320aic32x4.h +++ b/include/sound/tlv320aic32x4.h @@ -40,13 +40,4 @@ struct aic32x4_setup_data { unsigned int gpio_func[5]; }; - -struct aic32x4_pdata { - struct aic32x4_setup_data *setup; - u32 power_cfg; - u32 micpga_routing; - bool swapdacs; - int rstn_gpio; -}; - #endif -- cgit v1.2.3 From e24d552a17e92714d4f62e112d536babd6428acb Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 20 Jun 2025 16:33:05 +0100 Subject: mm/madvise: eliminate very confusing manipulation of prev VMA The madvise code has for the longest time had very confusing code around the 'prev' VMA pointer passed around various functions which, in all cases except madvise_update_vma(), is unused and instead simply updated as soon as the function is invoked. To compound the confusion, the prev pointer is also used to indicate to the caller that the mmap lock has been dropped and that we can therefore not safely access the end of the current VMA (which might have been updated by madvise_update_vma()). Clear up this confusion by not setting prev = vma anywhere except in madvise_walk_vmas(), update all references to prev which will always be equal to vma after madvise_vma_behavior() is invoked, and adding a flag to indicate that the lock has been dropped to make this explicit. Additionally, drop a redundant BUG_ON() from madvise_collapse(), which is simply reiterating the BUG_ON(mmap_locked) above it (note that BUG_ON() is not appropriate here, but we leave existing code as-is). We finally adjust the madvise_walk_vmas() logic to be a little clearer - delaying the assignment of the end of the range to the start of the new range until the last moment and handling the lock being dropped scenario immediately. Additionally add some explanatory comments. [lorenzo.stoakes@oracle.com: fix very subtle bug] Link: https://lkml.kernel.org/r/dca94cde-8afb-4eab-8e57-3f508624d670@lucifer.local Link: https://lkml.kernel.org/r/63d281c5df2e64225ab5b4bda398b45e22818701.1750433500.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Zi Yan Reviewed-by: SeongJae Park Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jann Horn Cc: Lance Yang Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8f1b15213f61..4d5bb67dc4ec 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -433,9 +433,8 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, int hugepage_madvise(struct vm_area_struct *vma, vm_flags_t *vm_flags, int advice); -int madvise_collapse(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end); +int madvise_collapse(struct vm_area_struct *vma, unsigned long start, + unsigned long end, bool *lock_dropped); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct vm_area_struct *next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); @@ -596,8 +595,8 @@ static inline int hugepage_madvise(struct vm_area_struct *vma, } static inline int madvise_collapse(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end) + unsigned long start, + unsigned long end, bool *lock_dropped) { return -EINVAL; } -- cgit v1.2.3 From 6b233784b198e0d6dbfd526341b6ec51ffd30020 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jun 2025 15:03:46 +0200 Subject: mm, madvise: extract mm code from prctl_set_vma() to mm/madvise.c Setting anon_name is done via madvise_set_anon_name() and behaves a lot of like other madvise operations. However, apparently because madvise() has lacked the 4th argument and prctl() not, the userspace entry point has been implemented via prctl(PR_SET_VMA, ...) and handled first by prctl_set_vma(). Currently prctl_set_vma() lives in kernel/sys.c but setting the vma->anon_name is mm-specific code so extract it to a new set_anon_vma_name() function under mm. mm/madvise.c seems to be the most straightforward place as that's where madvise_set_anon_name() lives. Stop declaring the latter in mm.h and instead declare set_anon_vma_name(). Link: https://lkml.kernel.org/r/20250624-anon_name_cleanup-v2-2-600075462a11@suse.cz Signed-off-by: Vlastimil Babka Acked-by: David Hildenbrand Tested-by: Lorenzo Stoakes Reviewed-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Cc: Colin Cross Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e0549f3d681..ef40f68c1183 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4059,14 +4059,14 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, #endif #ifdef CONFIG_ANON_VMA_NAME -int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, - struct anon_vma_name *anon_name); +int set_anon_vma_name(unsigned long addr, unsigned long size, + const char __user *uname); #else -static inline int -madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, struct anon_vma_name *anon_name) { - return 0; +static inline +int set_anon_vma_name(unsigned long addr, unsigned long size, + const char __user *uname) +{ + return -EINVAL; } #endif -- cgit v1.2.3 From 8d2882a8edb8621d37fd8931e0686070cc6cc189 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 16 Jun 2025 15:51:45 +0200 Subject: mm,memory_hotplug: remove status_change_nid_normal and update documentation Now that the last user of status_change_nid_normal is gone, we can remove it. Update documentation accordingly. Link: https://lkml.kernel.org/r/20250616135158.450136-3-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Harry Yoo Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joanthan Cameron Cc: Rakie Kim Signed-off-by: Andrew Morton --- include/linux/memory.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index bd4440bc4a57..a8284d41e452 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,7 +109,6 @@ struct memory_notify { unsigned long altmap_nr_pages; unsigned long start_pfn; unsigned long nr_pages; - int status_change_nid_normal; int status_change_nid; }; -- cgit v1.2.3 From 67929de108479dbb78496b61af5c24072fc16d8d Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 16 Jun 2025 15:51:46 +0200 Subject: mm,memory_hotplug: implement numa node notifier There are at least six consumers of hotplug_memory_notifier that what they really are interested in is whether any numa node changed its state, e.g: going from having memory to not having memory and vice versa. Implement a specific notifier for numa nodes when their state gets changed, which will later be used by those consumers that are only interested in numa node state changes. Add documentation as well. [dan.carpenter@linaro.org: set failure reason in offline_pages()] Link: https://lkml.kernel.org/r/be4fd31b-7d09-46b0-8329-6d0464ffa7a5@sabinyo.mountain Link: https://lkml.kernel.org/r/20250616135158.450136-4-osalvador@suse.de Signed-off-by: Oscar Salvador Signed-off-by: Dan Carpenter Reviewed-by: Jonathan Cameron Reviewed-by: Harry Yoo Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Rakie Kim Signed-off-by: Andrew Morton --- include/linux/node.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 88bceebcbfa5..2c7529335b21 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -125,6 +125,46 @@ static inline void register_memory_blocks_under_nodes(void) #endif extern void unregister_node(struct node *node); + +struct node_notify { + int nid; +}; + +#define NODE_ADDING_FIRST_MEMORY (1<<0) +#define NODE_ADDED_FIRST_MEMORY (1<<1) +#define NODE_CANCEL_ADDING_FIRST_MEMORY (1<<2) +#define NODE_REMOVING_LAST_MEMORY (1<<3) +#define NODE_REMOVED_LAST_MEMORY (1<<4) +#define NODE_CANCEL_REMOVING_LAST_MEMORY (1<<5) + +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) +extern int register_node_notifier(struct notifier_block *nb); +extern void unregister_node_notifier(struct notifier_block *nb); +extern int node_notify(unsigned long val, void *v); + +#define hotplug_node_notifier(fn, pri) ({ \ + static __meminitdata struct notifier_block fn##_node_nb =\ + { .notifier_call = fn, .priority = pri };\ + register_node_notifier(&fn##_node_nb); \ +}) +#else +static inline int register_node_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline void unregister_node_notifier(struct notifier_block *nb) +{ +} +static inline int node_notify(unsigned long val, void *v) +{ + return 0; +} +static inline int hotplug_node_notifier(notifier_fn_t fn, int pri) +{ + return 0; +} +#endif + #ifdef CONFIG_NUMA extern void node_dev_init(void); /* Core of the node registration - only memory hotplug should use this */ -- cgit v1.2.3 From d2a9721d807de405b198291badcc807700746781 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Mon, 16 Jun 2025 15:51:54 +0200 Subject: mm,memory_hotplug: drop status_change_nid parameter from memory_notify There no users left of status_change_nid, so drop it from memory_notify struct. Link: https://lkml.kernel.org/r/20250616135158.450136-12-osalvador@suse.de Signed-off-by: Oscar Salvador Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Vlastimil Babka Cc: Harry Yoo Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joanthan Cameron Cc: Rakie Kim Signed-off-by: Andrew Morton --- include/linux/memory.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index a8284d41e452..40eb70ccb09d 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,7 +109,6 @@ struct memory_notify { unsigned long altmap_nr_pages; unsigned long start_pfn; unsigned long nr_pages; - int status_change_nid; }; struct notifier_block; -- cgit v1.2.3 From 42f46ed99ac6c07adf7f3bcbe9040b0c52d62d0f Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 16 Jun 2025 22:11:09 -0400 Subject: mm/page_alloc: pageblock flags functions clean up Patch series "Make MIGRATE_ISOLATE a standalone bit", v10. This patchset moves MIGRATE_ISOLATE to a standalone bit to avoid being overwritten during pageblock isolation process. Currently, MIGRATE_ISOLATE is part of enum migratetype (in include/linux/mmzone.h), thus, setting a pageblock to MIGRATE_ISOLATE overwrites its original migratetype. This causes pageblock migratetype loss during alloc_contig_range() and memory offline, especially when the process fails due to a failed pageblock isolation and the code tries to undo the finished pageblock isolations. In terms of performance for changing pageblock types, no performance change is observed: 1. I used perf to collect stats of offlining and onlining all memory of a 40GB VM 10 times and see that get_pfnblock_flags_mask() and set_pfnblock_flags_mask() take about 0.12% and 0.02% of the whole process respectively with and without this patchset across 3 runs. 2. I used perf to collect stats of dd from /dev/random to a 40GB tmpfs file and find get_pfnblock_flags_mask() takes about 0.05% of the process with and without this patchset across 3 runs. This patch (of 6): No functional change is intended. 1. Add __NR_PAGEBLOCK_BITS for the number of pageblock flag bits and use roundup_pow_of_two(__NR_PAGEBLOCK_BITS) as NR_PAGEBLOCK_BITS to take right amount of bits for pageblock flags. 2. Rename PB_migrate_skip to PB_compact_skip. 3. Add {get,set,clear}_pfnblock_bit() to operate one a standalone bit, like PB_compact_skip. 3. Make {get,set}_pfnblock_flags_mask() internal functions and use {get,set}_pfnblock_migratetype() for pageblock migratetype operations. 4. Move pageblock flags common code to get_pfnblock_bitmap_bitidx(). 3. Use MIGRATETYPE_MASK to get the migratetype of a pageblock from its flags. 4. Use PB_migrate_end in the definition of MIGRATETYPE_MASK instead of PB_migrate_bits. 5. Add a comment on is_migrate_cma_folio() to prevent one from changing it to use get_pageblock_migratetype() and causing issues. Link: https://lkml.kernel.org/r/20250617021115.2331563-1-ziy@nvidia.com Link: https://lkml.kernel.org/r/20250617021115.2331563-2-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Brendan Jackman Cc: Johannes Weiner Cc: Kirill A. Shuemov Cc: Mel Gorman Cc: Michal Hocko Cc: Oscar Salvador Cc: Richard Chang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 18 ++++++++++-------- include/linux/page-isolation.h | 2 +- include/linux/pageblock-flags.h | 34 +++++++++++++++++----------------- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5bec8b1d0e66..76d66c07b673 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -92,8 +92,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) -# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ - get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) +/* + * __dump_folio() in mm/debug.c passes a folio pointer to on-stack struct folio, + * so folio_pfn() cannot be used and pfn is needed. + */ +# define is_migrate_cma_folio(folio, pfn) \ + (get_pfnblock_migratetype(&folio->page, pfn) == MIGRATE_CMA) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false @@ -122,14 +126,12 @@ static inline bool migratetype_is_mergeable(int mt) extern int page_group_by_mobility_disabled; -#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) +#define get_pageblock_migratetype(page) \ + get_pfnblock_migratetype(page, page_to_pfn(page)) -#define get_pageblock_migratetype(page) \ - get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) +#define folio_migratetype(folio) \ + get_pageblock_migratetype(&folio->page) -#define folio_migratetype(folio) \ - get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \ - MIGRATETYPE_MASK) struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 898bb788243b..277d8d92980c 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -25,7 +25,7 @@ static inline bool is_migrate_isolate(int migratetype) #define MEMORY_OFFLINE 0x1 #define REPORT_FAILURE 0x2 -void set_pageblock_migratetype(struct page *page, int migratetype); +void set_pageblock_migratetype(struct page *page, enum migratetype migratetype); bool move_freepages_block_isolate(struct zone *zone, struct page *page, int migratetype); diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 6297c6343c55..c240c7a1fb03 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -19,15 +19,19 @@ enum pageblock_bits { PB_migrate, PB_migrate_end = PB_migrate + PB_migratetype_bits - 1, /* 3 bits required for migrate types */ - PB_migrate_skip,/* If set the block is skipped by compaction */ + PB_compact_skip,/* If set the block is skipped by compaction */ /* * Assume the bits will always align on a word. If this assumption * changes then get/set pageblock needs updating. */ - NR_PAGEBLOCK_BITS + __NR_PAGEBLOCK_BITS }; +#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS)) + +#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1) + #if defined(CONFIG_HUGETLB_PAGE) #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE @@ -65,27 +69,23 @@ extern unsigned int pageblock_order; /* Forward declaration */ struct page; -unsigned long get_pfnblock_flags_mask(const struct page *page, - unsigned long pfn, - unsigned long mask); - -void set_pfnblock_flags_mask(struct page *page, - unsigned long flags, - unsigned long pfn, - unsigned long mask); +enum migratetype get_pfnblock_migratetype(const struct page *page, + unsigned long pfn); +bool get_pfnblock_bit(const struct page *page, unsigned long pfn, + enum pageblock_bits pb_bit); +void set_pfnblock_bit(const struct page *page, unsigned long pfn, + enum pageblock_bits pb_bit); +void clear_pfnblock_bit(const struct page *page, unsigned long pfn, + enum pageblock_bits pb_bit); /* Declarations for getting and setting flags. See mm/page_alloc.c */ #ifdef CONFIG_COMPACTION #define get_pageblock_skip(page) \ - get_pfnblock_flags_mask(page, page_to_pfn(page), \ - (1 << (PB_migrate_skip))) + get_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip) #define clear_pageblock_skip(page) \ - set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \ - (1 << PB_migrate_skip)) + clear_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip) #define set_pageblock_skip(page) \ - set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \ - page_to_pfn(page), \ - (1 << PB_migrate_skip)) + set_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip) #else static inline bool get_pageblock_skip(struct page *page) { -- cgit v1.2.3 From e904bce2d9d43e0f370e238457a13847d161570b Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 16 Jun 2025 22:11:10 -0400 Subject: mm/page_isolation: make page isolation a standalone bit During page isolation, the original migratetype is overwritten, since MIGRATE_* are enums and stored in pageblock bitmaps. Change MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like PB_compact_skip, so that migratetype is not lost during pageblock isolation. Link: https://lkml.kernel.org/r/20250617021115.2331563-3-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Brendan Jackman Cc: Johannes Weiner Cc: Kirill A. Shuemov Cc: Mel Gorman Cc: Michal Hocko Cc: Oscar Salvador Cc: Richard Chang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 3 +++ include/linux/page-isolation.h | 16 ++++++++++++++++ include/linux/pageblock-flags.h | 14 ++++++++++++++ 3 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 76d66c07b673..1d1bb2b7f40d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -79,6 +79,9 @@ enum migratetype { * __free_pageblock_cma() function. */ MIGRATE_CMA, + __MIGRATE_TYPE_END = MIGRATE_CMA, +#else + __MIGRATE_TYPE_END = MIGRATE_HIGHATOMIC, #endif #ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 277d8d92980c..fc021d3f95ca 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -11,6 +11,12 @@ static inline bool is_migrate_isolate(int migratetype) { return migratetype == MIGRATE_ISOLATE; } +#define get_pageblock_isolate(page) \ + get_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate) +#define clear_pageblock_isolate(page) \ + clear_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate) +#define set_pageblock_isolate(page) \ + set_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate) #else static inline bool is_migrate_isolate_page(struct page *page) { @@ -20,6 +26,16 @@ static inline bool is_migrate_isolate(int migratetype) { return false; } +static inline bool get_pageblock_isolate(struct page *page) +{ + return false; +} +static inline void clear_pageblock_isolate(struct page *page) +{ +} +static inline void set_pageblock_isolate(struct page *page) +{ +} #endif #define MEMORY_OFFLINE 0x1 diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index c240c7a1fb03..6a44be0f39f4 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -21,6 +21,13 @@ enum pageblock_bits { /* 3 bits required for migrate types */ PB_compact_skip,/* If set the block is skipped by compaction */ +#ifdef CONFIG_MEMORY_ISOLATION + /* + * Pageblock isolation is represented with a separate bit, so that + * the migratetype of a block is not overwritten by isolation. + */ + PB_migrate_isolate, /* If set the block is isolated */ +#endif /* * Assume the bits will always align on a word. If this assumption * changes then get/set pageblock needs updating. @@ -32,6 +39,13 @@ enum pageblock_bits { #define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1) +#ifdef CONFIG_MEMORY_ISOLATION +#define MIGRATETYPE_AND_ISO_MASK \ + (((1UL << (PB_migrate_end + 1)) - 1) | BIT(PB_migrate_isolate)) +#else +#define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK +#endif + #if defined(CONFIG_HUGETLB_PAGE) #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE -- cgit v1.2.3 From 1bc3587a88d291a37dab12d6c14aa7da53304251 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 16 Jun 2025 22:11:11 -0400 Subject: mm/page_alloc: add support for initializing pageblock as isolated MIGRATE_ISOLATE is a standalone bit, so a pageblock cannot be initialized to just MIGRATE_ISOLATE. Add init_pageblock_migratetype() to enable initialize a pageblock with a migratetype and isolated. Link: https://lkml.kernel.org/r/20250617021115.2331563-4-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Brendan Jackman Cc: Johannes Weiner Cc: Kirill A. Shuemov Cc: Mel Gorman Cc: Michal Hocko Cc: Oscar Salvador Cc: Richard Chang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/memory_hotplug.h | 3 ++- include/linux/page-isolation.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index eaac5ae8c05c..23f038a16231 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -314,7 +314,8 @@ extern int add_memory_driver_managed(int nid, u64 start, u64 size, mhp_t mhp_flags); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, int migratetype); + struct vmem_altmap *altmap, int migratetype, + bool isolate_pageblock); extern void remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index fc021d3f95ca..14c6a5f691c2 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -41,6 +41,9 @@ static inline void set_pageblock_isolate(struct page *page) #define MEMORY_OFFLINE 0x1 #define REPORT_FAILURE 0x2 +void __meminit init_pageblock_migratetype(struct page *page, + enum migratetype migratetype, + bool isolate); void set_pageblock_migratetype(struct page *page, enum migratetype migratetype); bool move_freepages_block_isolate(struct zone *zone, struct page *page, -- cgit v1.2.3 From b1df9c5713dc41229667aa44eaea2399e8de9470 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 16 Jun 2025 22:11:12 -0400 Subject: mm/page_isolation: remove migratetype from move_freepages_block_isolate() Since migratetype is no longer overwritten during pageblock isolation, moving a pageblock out of MIGRATE_ISOLATE no longer needs a new migratetype. Add pageblock_isolate_and_move_free_pages() and pageblock_unisolate_and_move_free_pages() to be explicit about the page isolation operations. Both share the common code in __move_freepages_block_isolate(), which is renamed from move_freepages_block_isolate(). Add toggle_pageblock_isolate() to flip pageblock isolation bit in __move_freepages_block_isolate(). Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types, so that one should use set_pageblock_isolate() to isolate pageblocks. As a result, move pageblock migratetype code out of __move_freepages_block(). Link: https://lkml.kernel.org/r/20250617021115.2331563-5-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Brendan Jackman Cc: Johannes Weiner Cc: Kirill A. Shuemov Cc: Mel Gorman Cc: Michal Hocko Cc: Oscar Salvador Cc: Richard Chang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/page-isolation.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 14c6a5f691c2..7241a6719618 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -44,10 +44,9 @@ static inline void set_pageblock_isolate(struct page *page) void __meminit init_pageblock_migratetype(struct page *page, enum migratetype migratetype, bool isolate); -void set_pageblock_migratetype(struct page *page, enum migratetype migratetype); -bool move_freepages_block_isolate(struct zone *zone, struct page *page, - int migratetype); +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page); +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype, int flags); -- cgit v1.2.3 From 7a3324eb66f616408fdaaff8a1289c0a9b333748 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 16 Jun 2025 22:11:13 -0400 Subject: mm/page_isolation: remove migratetype from undo_isolate_page_range() Since migratetype is no longer overwritten during pageblock isolation, undoing pageblock isolation no longer needs which migratetype to restore. Link: https://lkml.kernel.org/r/20250617021115.2331563-6-ziy@nvidia.com Signed-off-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Vlastimil Babka Cc: Baolin Wang Cc: Brendan Jackman Cc: Johannes Weiner Cc: Kirill A. Shuemov Cc: Mel Gorman Cc: Michal Hocko Cc: Oscar Salvador Cc: Richard Chang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/page-isolation.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 7241a6719618..7a681a49e73c 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -51,8 +51,7 @@ bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *pag int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype, int flags); -void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - int migratetype); +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, int isol_flags); -- cgit v1.2.3 From d1554fb6302093d353c8bf4601f9bf994b836904 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 16 Jun 2025 22:11:14 -0400 Subject: mm/page_isolation: remove migratetype parameter from more functions migratetype is no longer overwritten during pageblock isolation, start_isolate_page_range(), has_unmovable_pages(), and set_migratetype_isolate() no longer need which migratetype to restore during isolation failure. For has_unmoable_pages(), it needs to know if the isolation is for CMA allocation, so adding PB_ISOLATE_MODE_CMA_ALLOC provide the information. At the same time change isolation flags to enum pb_isolate_mode (PB_ISOLATE_MODE_MEM_OFFLINE, PB_ISOLATE_MODE_CMA_ALLOC, PB_ISOLATE_MODE_OTHER). Remove REPORT_FAILURE and check PB_ISOLATE_MODE_MEM_OFFLINE, since only PB_ISOLATE_MODE_MEM_OFFLINE reports isolation failures. alloc_contig_range() no longer needs migratetype. Replace it with a newly defined acr_flags_t to tell if an allocation is for CMA. So does __alloc_contig_migrate_range(). Add ACR_FLAGS_NONE (set to 0) to indicate ordinary allocations. Link: https://lkml.kernel.org/r/20250617021115.2331563-7-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Brendan Jackman Cc: Johannes Weiner Cc: Kirill A. Shuemov Cc: Mel Gorman Cc: Michal Hocko Cc: Oscar Salvador Cc: Richard Chang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/gfp.h | 7 ++++++- include/linux/page-isolation.h | 20 ++++++++++++++++---- include/trace/events/kmem.h | 14 ++++++++------ 3 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index be160e8d8bcb..5ebf26fcdcfa 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -423,9 +423,14 @@ static inline bool gfp_compaction_allowed(gfp_t gfp_mask) extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC + +typedef unsigned int __bitwise acr_flags_t; +#define ACR_FLAGS_NONE ((__force acr_flags_t)0) // ordinary allocation request +#define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA + /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range_noprof(unsigned long start, unsigned long end, - unsigned migratetype, gfp_t gfp_mask); + acr_flags_t alloc_flags, gfp_t gfp_mask); #define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__)) extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 7a681a49e73c..3e2f960e166c 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -38,8 +38,20 @@ static inline void set_pageblock_isolate(struct page *page) } #endif -#define MEMORY_OFFLINE 0x1 -#define REPORT_FAILURE 0x2 +/* + * Pageblock isolation modes: + * PB_ISOLATE_MODE_MEM_OFFLINE - isolate to offline (!allocate) memory + * e.g., skip over PageHWPoison() pages and + * PageOffline() pages. Unmovable pages will be + * reported in this mode. + * PB_ISOLATE_MODE_CMA_ALLOC - isolate for CMA allocations + * PB_ISOLATE_MODE_OTHER - isolate for other purposes + */ +enum pb_isolate_mode { + PB_ISOLATE_MODE_MEM_OFFLINE, + PB_ISOLATE_MODE_CMA_ALLOC, + PB_ISOLATE_MODE_OTHER, +}; void __meminit init_pageblock_migratetype(struct page *page, enum migratetype migratetype, @@ -49,10 +61,10 @@ bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page) bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - int migratetype, int flags); + enum pb_isolate_mode mode); void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, - int isol_flags); + enum pb_isolate_mode mode); #endif diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f74925a6cf69..efffcf578217 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -304,6 +304,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->change_ownership) ); +#ifdef CONFIG_CONTIG_ALLOC TRACE_EVENT(mm_alloc_contig_migrate_range_info, TP_PROTO(unsigned long start, @@ -311,9 +312,9 @@ TRACE_EVENT(mm_alloc_contig_migrate_range_info, unsigned long nr_migrated, unsigned long nr_reclaimed, unsigned long nr_mapped, - int migratetype), + acr_flags_t alloc_flags), - TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, migratetype), + TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, alloc_flags), TP_STRUCT__entry( __field(unsigned long, start) @@ -321,7 +322,7 @@ TRACE_EVENT(mm_alloc_contig_migrate_range_info, __field(unsigned long, nr_migrated) __field(unsigned long, nr_reclaimed) __field(unsigned long, nr_mapped) - __field(int, migratetype) + __field(acr_flags_t, alloc_flags) ), TP_fast_assign( @@ -330,17 +331,18 @@ TRACE_EVENT(mm_alloc_contig_migrate_range_info, __entry->nr_migrated = nr_migrated; __entry->nr_reclaimed = nr_reclaimed; __entry->nr_mapped = nr_mapped; - __entry->migratetype = migratetype; + __entry->alloc_flags = alloc_flags; ), - TP_printk("start=0x%lx end=0x%lx migratetype=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu", + TP_printk("start=0x%lx end=0x%lx alloc_flags=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu", __entry->start, __entry->end, - __entry->migratetype, + __entry->alloc_flags, __entry->nr_migrated, __entry->nr_reclaimed, __entry->nr_mapped) ); +#endif TRACE_EVENT(mm_setup_per_zone_wmarks, -- cgit v1.2.3 From eff41389d8249a1a5a67faa440255ed8e526803a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 27 Jun 2025 12:07:07 -0400 Subject: mm/hugetlb: remove prepare_hugepage_range() Only mips and loongarch implemented this API, however what it does was checking against stack overflow for either len or addr. That's already done in arch's arch_get_unmapped_area*() functions, even though it may not be 100% identical checks. For example, for both of the architectures, there will be a trivial difference on how stack top was defined. The old code uses STACK_TOP which may be slightly smaller than TASK_SIZE on either of them, but the hope is that shouldn't be a problem. It means the whole API is pretty much obsolete at least now, remove it completely. Link: https://lkml.kernel.org/r/20250627160707.2124580-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Reviewed-by: Oscar Salvador Acked-by: David Hildenbrand Reviewed-by: Liam R. Howlett Reviewed-by: Anshuman Khandual Cc: Huacai Chen Cc: Thomas Bogendoerfer Cc: Muchun Song Cc: Jann Horn Cc: Lorenzo Stoakes Cc: Pedro Falcato Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/asm-generic/hugetlb.h | 8 -------- include/linux/hugetlb.h | 6 ------ 2 files changed, 14 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 3e0a8fe9b108..4bce4f07f44f 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -114,14 +114,6 @@ static inline int huge_pte_none_mostly(pte_t pte) } #endif -#ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - return 0; -} -#endif - #ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c6c87eae4a8d..474de8e2a8f2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -359,12 +359,6 @@ static inline void hugetlb_show_meminfo_node(int nid) { } -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - return -EINVAL; -} - static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) { } -- cgit v1.2.3 From 5bd3b163e374462c05c055ff091582d757929d3f Mon Sep 17 00:00:00 2001 From: Xuanye Liu Date: Wed, 2 Jul 2025 15:12:35 +0800 Subject: mm: fix spelling issue in swap.h recliam -> reclaim Link: https://lkml.kernel.org/r/20250702071235.212794-1-liuqiye2025@163.com Signed-off-by: Xuanye Liu Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bc0e1c275fc0..a49be950c485 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -415,7 +415,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MIN_SWAPPINESS 0 #define MAX_SWAPPINESS 200 -/* Just recliam from anon folios in proactive memory reclaim */ +/* Just reclaim from anon folios in proactive memory reclaim */ #define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1) extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, -- cgit v1.2.3 From 526f36f3f47b9ad29ffb1bf668b7f295287ee11b Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 3 Jul 2025 12:03:38 +0530 Subject: maple tree: add some comments Add comments explaining the fields for maple_metadata, since "end" is ambiguous and "gap" can be confused as the largest gap, whereas it is actually the offset of the largest gap. Add comment for mas_ascend() to explain, whose min and max we are trying to find. Explain that, for example, if we are already on offset zero, then the parent min is mas->min, otherwise we need to walk up to find the implied pivot min. Link: https://lkml.kernel.org/r/20250703063338.51509-1-dev.jain@arm.com Signed-off-by: Dev Jain Reviewed-by: Liam R. Howlett Cc: Wei Yang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 9ef129038224..bafe143b1f78 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -75,8 +75,8 @@ * searching for gaps or any other code that needs to find the end of the data. */ struct maple_metadata { - unsigned char end; - unsigned char gap; + unsigned char end; /* end of data */ + unsigned char gap; /* offset of largest gap */ }; /* -- cgit v1.2.3 From 15504b1163007bbfbd9a63460d5c14737c16e96d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:24:56 +0200 Subject: mm/balloon_compaction: convert balloon_page_delete() to balloon_page_finalize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's move the removal of the page from the balloon list into the single caller, to remove the dependency on the PG_isolated flag and clarify locking requirements. Note that for now, balloon_page_delete() was used on two paths: (1) Removing a page from the balloon for deflation through balloon_page_list_dequeue() (2) Removing an isolated page from the balloon for migration in the per-driver migration handlers. Isolated pages were already removed from the balloon list during isolation. So instead of relying on the flag, we can just distinguish both cases directly and handle it accordingly in the caller. We'll shuffle the operations a bit such that they logically make more sense (e.g., remove from the list before clearing flags). In balloon migration functions we can now move the balloon_page_finalize() out of the balloon lock and perform the finalization just before dropping the balloon reference. Document that the page lock is currently required when modifying the movability aspects of a page; hopefully we can soon decouple this from the page lock. Link: https://lkml.kernel.org/r/20250704102524.326966-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: Harry Yoo Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 43 ++++++++++++++------------------------ 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 5ca2d5699620..b9f19da37b08 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -97,27 +97,6 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, list_add(&page->lru, &balloon->pages); } -/* - * balloon_page_delete - delete a page from balloon's page list and clear - * the page->private assignement accordingly. - * @page : page to be released from balloon's page list - * - * Caller must ensure the page is locked and the spin_lock protecting balloon - * pages list is held before deleting a page from the balloon device. - */ -static inline void balloon_page_delete(struct page *page) -{ - __ClearPageOffline(page); - __ClearPageMovable(page); - set_page_private(page, 0); - /* - * No touch page.lru field once @page has been isolated - * because VM is using the field. - */ - if (!PageIsolated(page)) - list_del(&page->lru); -} - /* * balloon_page_device - get the b_dev_info descriptor for the balloon device * that enqueues the given page. @@ -141,12 +120,6 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, list_add(&page->lru, &balloon->pages); } -static inline void balloon_page_delete(struct page *page) -{ - __ClearPageOffline(page); - list_del(&page->lru); -} - static inline gfp_t balloon_mapping_gfp_mask(void) { return GFP_HIGHUSER; @@ -154,6 +127,22 @@ static inline gfp_t balloon_mapping_gfp_mask(void) #endif /* CONFIG_BALLOON_COMPACTION */ +/* + * balloon_page_finalize - prepare a balloon page that was removed from the + * balloon list for release to the page allocator + * @page: page to be released to the page allocator + * + * Caller must ensure that the page is locked. + */ +static inline void balloon_page_finalize(struct page *page) +{ + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) { + __ClearPageMovable(page); + set_page_private(page, 0); + } + __ClearPageOffline(page); +} + /* * balloon_page_push - insert a page into a page list. * @head : pointer to list -- cgit v1.2.3 From 65aabd88dffda68639808e0827cfef624a1cd55f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:24:59 +0200 Subject: mm/balloon_compaction: make PageOffline sticky until the page is freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the page freeing code handle clearing the page type. Being able to identify balloon pages until actually freed is a requirement for upcoming movable_ops migration changes. Link: https://lkml.kernel.org/r/20250704102524.326966-6-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Zi Yan Acked-by: Harry Yoo Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index b9f19da37b08..bfc6e50bd004 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -140,7 +140,7 @@ static inline void balloon_page_finalize(struct page *page) __ClearPageMovable(page); set_page_private(page, 0); } - __ClearPageOffline(page); + /* PageOffline is sticky until the page is freed to the buddy. */ } /* -- cgit v1.2.3 From 6ef0c1976b8fab938e732c2fb751fa8965153b2e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:01 +0200 Subject: mm/migrate: rename isolate_movable_page() to isolate_movable_ops_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... and start moving back to per-page things that will absolutely not be folio things in the future. Add documentation and a comment that the remaining folio stuff (lock, refcount) will have to be reworked as well. While at it, convert the VM_BUG_ON() into a WARN_ON_ONCE() and handle it gracefully (relevant with further changes), and convert a WARN_ON_ONCE() into a VM_WARN_ON_ONCE_PAGE(). Note that we will leave anything that needs a rework (lock, refcount, ->lru) to be using folios for now: that perfectly highlights the problematic bits. Link: https://lkml.kernel.org/r/20250704102524.326966-8-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Harry Yoo Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/migrate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index aaa2114498d6..c0ec7422837b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -69,7 +69,7 @@ int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); struct folio *alloc_migration_target(struct folio *src, unsigned long private); -bool isolate_movable_page(struct page *page, isolate_mode_t mode); +bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode); bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, @@ -90,7 +90,7 @@ static inline int migrate_pages(struct list_head *l, new_folio_t new, static inline struct folio *alloc_migration_target(struct folio *src, unsigned long private) { return NULL; } -static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode) +static inline bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode) { return false; } static inline bool isolate_folio_to_list(struct folio *folio, struct list_head *list) { return false; } -- cgit v1.2.3 From 07e5355eeead3a715bb48ffe13499dd3d0178e52 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:04 +0200 Subject: mm/migrate: remove folio_test_movable() and folio_movable_ops() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Folios will have nothing to do with movable_ops page migration. These functions are now unused, so let's remove them. Note that __folio_test_movable() and friends will be removed separately next, after more rework. Link: https://lkml.kernel.org/r/20250704102524.326966-11-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/migrate.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c0ec7422837b..c99a00d4ca27 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -118,20 +118,6 @@ static inline void __ClearPageMovable(struct page *page) } #endif -static inline bool folio_test_movable(struct folio *folio) -{ - return PageMovable(&folio->page); -} - -static inline -const struct movable_operations *folio_movable_ops(struct folio *folio) -{ - VM_BUG_ON(!__folio_test_movable(folio)); - - return (const struct movable_operations *) - ((unsigned long)folio->mapping - PAGE_MAPPING_MOVABLE); -} - static inline const struct movable_operations *page_movable_ops(struct page *page) { -- cgit v1.2.3 From 3544c4faccb8f0867bc65f8007ee70bfb5054305 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:07 +0200 Subject: mm/balloon_compaction: stop using __ClearPageMovable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can just look at the balloon device (stored in page->private), to see if the page is still part of the balloon. As isolated balloon pages cannot get released (they are taken off the balloon list while isolated), we don't have to worry about this case in the putback and migration callback. Add a WARN_ON_ONCE for now. Link: https://lkml.kernel.org/r/20250704102524.326966-14-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: Harry Yoo Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index bfc6e50bd004..9bce8e9f5018 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -136,10 +136,8 @@ static inline gfp_t balloon_mapping_gfp_mask(void) */ static inline void balloon_page_finalize(struct page *page) { - if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) { - __ClearPageMovable(page); + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) set_page_private(page, 0); - } /* PageOffline is sticky until the page is freed to the buddy. */ } -- cgit v1.2.3 From 34727dee04994c8ceb1bb8a927af0a88e52e103c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:08 +0200 Subject: mm/migrate: remove __ClearPageMovable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unused, let's remove it. The Chinese docs in Documentation/translations/zh_CN/mm/page_migration.rst still mention it, but that whole docs is destined to get outdated and updated by somebody that actually speaks that language. Link: https://lkml.kernel.org/r/20250704102524.326966-15-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Harry Yoo Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/migrate.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c99a00d4ca27..6eeda8eb1e0d 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -35,8 +35,8 @@ struct migration_target_control; * @src page. The driver should copy the contents of the * @src page to the @dst page and set up the fields of @dst page. * Both pages are locked. - * If page migration is successful, the driver should call - * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If page migration is successful, the driver should + * return MIGRATEPAGE_SUCCESS. * If the driver cannot migrate the page at the moment, it can return * -EAGAIN. The VM interprets this as a temporary migration failure and * will retry it later. Any other error value is a permanent migration @@ -106,16 +106,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #ifdef CONFIG_COMPACTION bool PageMovable(struct page *page); void __SetPageMovable(struct page *page, const struct movable_operations *ops); -void __ClearPageMovable(struct page *page); #else static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, const struct movable_operations *ops) { } -static inline void __ClearPageMovable(struct page *page) -{ -} #endif static inline -- cgit v1.2.3 From 22d103aef090dc688a88881fb955376dec1228d5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:09 +0200 Subject: mm/migration: remove PageMovable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, if __ClearPageMovable() were invoked on a page, this would cause __PageMovable() to return false, but due to the continued existence of page movable ops, PageMovable() would have returned true. With __ClearPageMovable() gone, the two are exactly equivalent. So we can replace PageMovable() checks by __PageMovable(). In fact, __PageMovable() cannot change until a page is freed, so we can turn some PageMovable() into sanity checks for __PageMovable(). Link: https://lkml.kernel.org/r/20250704102524.326966-16-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/migrate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 6eeda8eb1e0d..25659a685e2a 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -104,10 +104,8 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION -bool PageMovable(struct page *page); void __SetPageMovable(struct page *page, const struct movable_operations *ops); #else -static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, const struct movable_operations *ops) { -- cgit v1.2.3 From d4fb4587bd73b6b773397f5fed52a5e4bd4dec8b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:10 +0200 Subject: mm: rename __PageMovable() to page_has_movable_ops() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's make it clearer that we are talking about movable_ops pages. While at it, convert a VM_BUG_ON to a VM_WARN_ON_ONCE_PAGE. Link: https://lkml.kernel.org/r/20250704102524.326966-17-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/migrate.h | 2 +- include/linux/page-flags.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 25659a685e2a..e04035f70e36 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -115,7 +115,7 @@ static inline void __SetPageMovable(struct page *page, static inline const struct movable_operations *page_movable_ops(struct page *page) { - VM_BUG_ON(!__PageMovable(page)); + VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page); return (const struct movable_operations *) ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4fe5ee67535b..c67163b73c5e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -750,7 +750,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio) PAGE_MAPPING_MOVABLE; } -static __always_inline bool __PageMovable(const struct page *page) +static __always_inline bool page_has_movable_ops(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; -- cgit v1.2.3 From 457d7b3adb11576ce5f3ae0d9a4987ace213bed2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:12 +0200 Subject: mm: remove __folio_test_movable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert to page_has_movable_ops(). While at it, cleanup relevant code a bit. The data_race() in migrate_folio_unmap() is questionable: we already hold a page reference, and concurrent modifications can no longer happen (iow: __ClearPageMovable() no longer exists). Drop it for now, we'll rework page_has_movable_ops() soon either way to no longer rely on page->mapping. Wherever we cast from folio to page now is a clear sign that this code has to be decoupled. Link: https://lkml.kernel.org/r/20250704102524.326966-19-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c67163b73c5e..4c27ebb689e3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -744,12 +744,6 @@ static __always_inline bool PageAnon(const struct page *page) return folio_test_anon(page_folio(page)); } -static __always_inline bool __folio_test_movable(const struct folio *folio) -{ - return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == - PAGE_MAPPING_MOVABLE; -} - static __always_inline bool page_has_movable_ops(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == -- cgit v1.2.3 From 84caf98838a3e5f4bdb344c13679e1067ffbf094 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:13 +0200 Subject: mm: stop storing migration_ops in page->mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... instead, look them up statically based on the page type. Maybe in the future we want a registration interface? At least for now, it can be easily handled using the two page types that actually support page migration. The remaining usage of page->mapping is to flag such pages as actually being movable (having movable_ops), which we will change next. Link: https://lkml.kernel.org/r/20250704102524.326966-20-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 2 +- include/linux/migrate.h | 14 ++------------ include/linux/zsmalloc.h | 2 ++ 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 9bce8e9f5018..a8a1706cc56f 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -92,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, &balloon_mops); + __SetPageMovable(page); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e04035f70e36..6aece3f3c8be 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -104,23 +104,13 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION -void __SetPageMovable(struct page *page, const struct movable_operations *ops); +void __SetPageMovable(struct page *page); #else -static inline void __SetPageMovable(struct page *page, - const struct movable_operations *ops) +static inline void __SetPageMovable(struct page *page) { } #endif -static inline -const struct movable_operations *page_movable_ops(struct page *page) -{ - VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page); - - return (const struct movable_operations *) - ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); -} - #ifdef CONFIG_NUMA_BALANCING int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node); diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 13e9cc5490f7..f3ccff2d966c 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -46,4 +46,6 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, void zs_obj_write(struct zs_pool *pool, unsigned long handle, void *handle_mem, size_t mem_len); +extern const struct movable_operations zsmalloc_mops; + #endif -- cgit v1.2.3 From 3d388584d59985e95f5bfb9dbd9776fa1bb1ec8a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:14 +0200 Subject: mm: convert "movable" flag in page->mapping to a page flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead, let's use a page flag. As the page flag can result in false-positives, glue it to the page types for which we support/implement movable_ops page migration. We are reusing PG_uptodate, that is for example used to track file system state and does not apply to movable_ops pages. So warning in case it is set in page_has_movable_ops() on other page types could result in false-positive warnings. Likely we could set the bit using a non-atomic update: in contrast to page->mapping, we could have others trying to update the flags concurrently when trying to lock the folio. In isolate_movable_ops_page(), we already take care of that by checking if the page has movable_ops before locking it. Let's start with the atomic variant, we could later switch to the non-atomic variant once we are sure other cases are similarly fine. Once we perform the switch, we'll have to introduce __SETPAGEFLAG_NOOP(). [david@redhat.com: add missing `:' in kerneldoc] Link: https://lkml.kernel.org/r/d96e2916-2c43-462c-b6a1-2375ef397d8b@redhat.com Link: https://lkml.kernel.org/r/20250704102524.326966-21-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: Harry Yoo Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 2 +- include/linux/migrate.h | 8 ------ include/linux/page-flags.h | 54 +++++++++++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index a8a1706cc56f..b222b0737c46 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -92,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page); + SetPageMovableOps(page); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 6aece3f3c8be..acadd41e0b5c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -103,14 +103,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ -#ifdef CONFIG_COMPACTION -void __SetPageMovable(struct page *page); -#else -static inline void __SetPageMovable(struct page *page) -{ -} -#endif - #ifdef CONFIG_NUMA_BALANCING int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4c27ebb689e3..a02a12e51915 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -170,6 +170,11 @@ enum pageflags { /* non-lru isolated movable page */ PG_isolated = PG_reclaim, +#ifdef CONFIG_MIGRATION + /* this is a movable_ops page (for selected typed pages only) */ + PG_movable_ops = PG_uptodate, +#endif + /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, @@ -698,9 +703,6 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * bit; and then folio->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged page. See ksm.h. * - * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then folio->mapping points to a struct movable_operations. - * * Please note that, confusingly, "folio_mapping" refers to the inode * address_space which maps the folio from disk; whereas "folio_mapped" * refers to user virtual address space into which the folio is mapped. @@ -743,13 +745,6 @@ static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } - -static __always_inline bool page_has_movable_ops(const struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == - PAGE_MAPPING_MOVABLE; -} - #ifdef CONFIG_KSM /* * A KSM page is one of those write-protected "shared pages" or "merged pages" @@ -1133,6 +1128,45 @@ bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); +#ifdef CONFIG_MIGRATION +/* + * This page is migratable through movable_ops (for selected typed pages + * only). + * + * Page migration of such pages might fail, for example, if the page is + * already isolated by somebody else, or if the page is about to get freed. + * + * While a subsystem might set selected typed pages that support page migration + * as being movable through movable_ops, it must never clear this flag. + * + * This flag is only cleared when the page is freed back to the buddy. + * + * Only selected page types support this flag (see page_movable_ops()) and + * the flag might be used in other context for other pages. Always use + * page_has_movable_ops() instead. + */ +TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +#else /* !CONFIG_MIGRATION */ +TESTPAGEFLAG_FALSE(MovableOps, movable_ops); +SETPAGEFLAG_NOOP(MovableOps, movable_ops); +#endif /* CONFIG_MIGRATION */ + +/** + * page_has_movable_ops - test for a movable_ops page + * @page: The page to test. + * + * Test whether this is a movable_ops page. Such pages will stay that + * way until freed. + * + * Returns true if this is a movable_ops page, otherwise false. + */ +static inline bool page_has_movable_ops(const struct page *page) +{ + return PageMovableOps(page) && + (PageOffline(page) || PageZsmalloc(page)); +} + static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); -- cgit v1.2.3 From 92f091769fde42509ca7685e67c9951f2350ceb7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:15 +0200 Subject: mm: rename PG_isolated to PG_movable_ops_isolated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's rename the flag to make it clearer where it applies (not folios ...). While at it, define the flag only with CONFIG_MIGRATION. Link: https://lkml.kernel.org/r/20250704102524.326966-22-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a02a12e51915..92f7152a445e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -167,10 +167,9 @@ enum pageflags { /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, - /* non-lru isolated movable page */ - PG_isolated = PG_reclaim, - #ifdef CONFIG_MIGRATION + /* movable_ops page that is isolated for migration */ + PG_movable_ops_isolated = PG_reclaim, /* this is a movable_ops page (for selected typed pages only) */ PG_movable_ops = PG_uptodate, #endif @@ -1126,8 +1125,6 @@ static inline bool folio_contain_hwpoisoned_page(struct folio *folio) bool is_free_buddy_page(const struct page *page); -PAGEFLAG(Isolated, isolated, PF_ANY); - #ifdef CONFIG_MIGRATION /* * This page is migratable through movable_ops (for selected typed pages @@ -1147,9 +1144,18 @@ PAGEFLAG(Isolated, isolated, PF_ANY); */ TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +/* + * A movable_ops page has this flag set while it is isolated for migration. + * This flag primarily protects against concurrent migration attempts. + * + * Once migration ended (success or failure), the flag is cleared. The + * flag is managed by the migration core. + */ +PAGEFLAG(MovableOpsIsolated, movable_ops_isolated, PF_NO_TAIL); #else /* !CONFIG_MIGRATION */ TESTPAGEFLAG_FALSE(MovableOps, movable_ops); SETPAGEFLAG_NOOP(MovableOps, movable_ops); +PAGEFLAG_FALSE(MovableOpsIsolated, movable_ops_isolated); #endif /* CONFIG_MIGRATION */ /** -- cgit v1.2.3 From bd56d30242039826160d95a65cb14c1cd65e6488 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:16 +0200 Subject: mm/page-flags: rename PAGE_MAPPING_MOVABLE to PAGE_MAPPING_ANON_KSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KSM is the only remaining user, let's rename the flag. While at it, adjust to remaining page -> folio in the doc. Link: https://lkml.kernel.org/r/20250704102524.326966-23-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 92f7152a445e..22767499fb21 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -697,10 +697,10 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * folio->mapping points to its anon_vma, not to a struct address_space; * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. * - * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, - * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON + * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled, + * the PAGE_MAPPING_ANON_KSM bit may be set along with the PAGE_MAPPING_ANON * bit; and then folio->mapping points, not to an anon_vma, but to a private - * structure which KSM associates with that merged page. See ksm.h. + * structure which KSM associates with that merged folio. See ksm.h. * * Please note that, confusingly, "folio_mapping" refers to the inode * address_space which maps the folio from disk; whereas "folio_mapped" @@ -714,9 +714,9 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * See mm/slab.h. */ #define PAGE_MAPPING_ANON 0x1 -#define PAGE_MAPPING_MOVABLE 0x2 -#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) -#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) +#define PAGE_MAPPING_ANON_KSM 0x2 +#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_ANON_KSM) +#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_ANON_KSM) static __always_inline bool folio_mapping_flags(const struct folio *folio) { -- cgit v1.2.3 From 5799c0ed0aff65989b04ca3f517401e4ee94da10 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:17 +0200 Subject: mm/page-alloc: remove PageMappingFlags() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As PageMappingFlags() now only indicates anon (incl. KSM) folios, we can now simply check for PageAnon() and remove PageMappingFlags(). ... and while at it, use the folio instead and operate on folio->mapping. Link: https://lkml.kernel.org/r/20250704102524.326966-24-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 22767499fb21..a2151f6d96ae 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -723,11 +723,6 @@ static __always_inline bool folio_mapping_flags(const struct folio *folio) return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline bool PageMappingFlags(const struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; -} - static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; -- cgit v1.2.3 From beb2cdeed673150cdfad653dd2e7c9999c230f57 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:18 +0200 Subject: mm/page-flags: remove folio_mapping_flags() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unused and the page counterpart is gone, so let's remove it. Link: https://lkml.kernel.org/r/20250704102524.326966-25-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a2151f6d96ae..ae2b80fcea6a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -718,11 +718,6 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_ANON_KSM) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_ANON_KSM) -static __always_inline bool folio_mapping_flags(const struct folio *folio) -{ - return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; -} - static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; -- cgit v1.2.3 From 78cb1a13c42a6d843e21389f74d1edb90ed07288 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:19 +0200 Subject: mm: simplify folio_expected_ref_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that PAGE_MAPPING_MOVABLE is gone, we can simplify and rely on the folio_test_anon() test only. ... but staring at the users, this function should never even have been called on movable_ops pages. E.g., * __buffer_migrate_folio() does not make sense for them * folio_migrate_mapping() does not make sense for them * migrate_huge_page_move_mapping() does not make sense for them * __migrate_folio() does not make sense for them * ... and khugepaged should never stumble over them Let's simply refuse typed pages (which includes slab) except hugetlb, and WARN. Link: https://lkml.kernel.org/r/20250704102524.326966-26-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef40f68c1183..805108d7bbc3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2167,13 +2167,13 @@ static inline int folio_expected_ref_count(const struct folio *folio) const int order = folio_order(folio); int ref_count = 0; - if (WARN_ON_ONCE(folio_test_slab(folio))) + if (WARN_ON_ONCE(page_has_type(&folio->page) && !folio_test_hugetlb(folio))) return 0; if (folio_test_anon(folio)) { /* One reference per page from the swapcache. */ ref_count += folio_test_swapcache(folio) << order; - } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) { + } else { /* One reference per page from the pagecache. */ ref_count += !!folio->mapping << order; /* One reference from PG_private. */ -- cgit v1.2.3 From df25569d401e36327b339c3f5b3265d74eae90f2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:20 +0200 Subject: mm: rename PAGE_MAPPING_* to FOLIO_MAPPING_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the mapping flags are only used for folios, let's rename the defines. Link: https://lkml.kernel.org/r/20250704102524.326966-27-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 +- include/linux/mm_types.h | 1 - include/linux/page-flags.h | 20 ++++++++++---------- include/linux/pagemap.h | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e14e9d11ca0f..d3e7ad6941a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -526,7 +526,7 @@ struct address_space { /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit - * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. + * of struct folio's "mapping" pointer be used for FOLIO_MAPPING_ANON. */ /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 804d269a4f5e..1ec273b06691 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -105,7 +105,6 @@ struct page { unsigned int order; }; }; - /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { pgoff_t __folio_index; /* Our offset within mapping. */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ae2b80fcea6a..8e4d6eda8a8d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -695,10 +695,10 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) /* * On an anonymous folio mapped into a user virtual memory area, * folio->mapping points to its anon_vma, not to a struct address_space; - * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. + * with the FOLIO_MAPPING_ANON bit set to distinguish it. See rmap.h. * * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled, - * the PAGE_MAPPING_ANON_KSM bit may be set along with the PAGE_MAPPING_ANON + * the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON * bit; and then folio->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged folio. See ksm.h. * @@ -713,21 +713,21 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * false before calling the following functions (e.g., folio_test_anon). * See mm/slab.h. */ -#define PAGE_MAPPING_ANON 0x1 -#define PAGE_MAPPING_ANON_KSM 0x2 -#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_ANON_KSM) -#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_ANON_KSM) +#define FOLIO_MAPPING_ANON 0x1 +#define FOLIO_MAPPING_ANON_KSM 0x2 +#define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) +#define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) static __always_inline bool folio_test_anon(const struct folio *folio) { - return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; + return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0; } static __always_inline bool PageAnonNotKsm(const struct page *page) { unsigned long flags = (unsigned long)page_folio(page)->mapping; - return (flags & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_ANON; + return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON; } static __always_inline bool PageAnon(const struct page *page) @@ -743,8 +743,8 @@ static __always_inline bool PageAnon(const struct page *page) */ static __always_inline bool folio_test_ksm(const struct folio *folio) { - return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == - PAGE_MAPPING_KSM; + return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) == + FOLIO_MAPPING_KSM; } #else FOLIO_TEST_FLAG_FALSE(ksm) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e63fbfbd5b0f..10a222e68b85 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -502,7 +502,7 @@ static inline pgoff_t mapping_align_index(struct address_space *mapping, static inline bool mapping_large_folio_support(struct address_space *mapping) { /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ - VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON, "Anonymous mapping always supports large folio"); return mapping_max_folio_order(mapping) > 0; -- cgit v1.2.3 From f5e43012b86aa6a0b0ad5881cb68bfb872826c22 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:22 +0200 Subject: mm/balloon_compaction: "movable_ops" doc updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's bring the docs up-to-date. Setting PG_movable_ops + page->private very likely still requires to be performed under documented locks: it's complicated. We will rework this in the future, as we will try avoiding using the page lock. Link: https://lkml.kernel.org/r/20250704102524.326966-29-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: Harry Yoo Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index b222b0737c46..2fecfead91d2 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -4,12 +4,13 @@ * * Common interface definitions for making balloon pages movable by compaction. * - * Balloon page migration makes use of the general non-lru movable page + * Balloon page migration makes use of the general "movable_ops page migration" * feature. * * page->private is used to reference the responsible balloon device. - * page->mapping is used in context of non-lru page migration to reference - * the address space operations for page isolation/migration/compaction. + * That these pages have movable_ops, and which movable_ops apply, + * is derived from the page type (PageOffline()) combined with the + * PG_movable_ops flag (PageMovableOps()). * * As the page isolation scanning step a compaction thread does is a lockless * procedure (from a page standpoint), it might bring some racy situations while @@ -17,12 +18,10 @@ * and safely perform balloon's page compaction and migration we must, always, * ensure following these simple rules: * - * i. when updating a balloon's page ->mapping element, strictly do it under - * the following lock order, independently of the far superior - * locking scheme (lru_lock, balloon_lock): + * i. Setting the PG_movable_ops flag and page->private with the following + * lock order * +-page_lock(page); * +--spin_lock_irq(&b_dev_info->pages_lock); - * ... page->mapping updates here ... * * ii. isolation or dequeueing procedure must remove the page from balloon * device page list under b_dev_info->pages_lock. -- cgit v1.2.3 From 9640b17a89a86f40df47bfc831a8afeff0c7eabc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 4 Jul 2025 12:25:23 +0200 Subject: mm/balloon_compaction: provide single balloon_page_insert() and balloon_mapping_gfp_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's just special-case based on IS_ENABLED(CONFIG_BALLOON_COMPACTION) like we did for balloon_page_finalize(). Link: https://lkml.kernel.org/r/20250704102524.326966-30-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Brendan Jackman Cc: Byungchul Park Cc: Chengming Zhou Cc: Christian Brauner Cc: Christophe Leroy Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: Harry Yoo Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Wang Cc: Jerrin Shaji George Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joshua Hahn Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Qi Zheng Cc: Rakie Kim Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/balloon_compaction.h | 42 ++++++++++++++------------------------ 1 file changed, 15 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 2fecfead91d2..7cfe48769239 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -77,6 +77,15 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) #ifdef CONFIG_BALLOON_COMPACTION extern const struct movable_operations balloon_mops; +/* + * balloon_page_device - get the b_dev_info descriptor for the balloon device + * that enqueues the given page. + */ +static inline struct balloon_dev_info *balloon_page_device(struct page *page) +{ + return (struct balloon_dev_info *)page_private(page); +} +#endif /* CONFIG_BALLOON_COMPACTION */ /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -91,41 +100,20 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - SetPageMovableOps(page); - set_page_private(page, (unsigned long)balloon); - list_add(&page->lru, &balloon->pages); -} - -/* - * balloon_page_device - get the b_dev_info descriptor for the balloon device - * that enqueues the given page. - */ -static inline struct balloon_dev_info *balloon_page_device(struct page *page) -{ - return (struct balloon_dev_info *)page_private(page); -} - -static inline gfp_t balloon_mapping_gfp_mask(void) -{ - return GFP_HIGHUSER_MOVABLE; -} - -#else /* !CONFIG_BALLOON_COMPACTION */ - -static inline void balloon_page_insert(struct balloon_dev_info *balloon, - struct page *page) -{ - __SetPageOffline(page); + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) { + SetPageMovableOps(page); + set_page_private(page, (unsigned long)balloon); + } list_add(&page->lru, &balloon->pages); } static inline gfp_t balloon_mapping_gfp_mask(void) { + if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) + return GFP_HIGHUSER_MOVABLE; return GFP_HIGHUSER; } -#endif /* CONFIG_BALLOON_COMPACTION */ - /* * balloon_page_finalize - prepare a balloon page that was removed from the * balloon list for release to the page allocator -- cgit v1.2.3 From 214db70287277096e77d804284066ce1c07297dd Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 4 Jul 2025 15:14:07 -0700 Subject: mm/damon: add trace event for auto-tuned monitoring intervals Patch series "mm/damon: add trace events for auto-tuned monitoring intervals and DAMOS quota". The aim-oriented auto-tuning features for monitoring intervals and DAMOS quota are important and recommended. Add tracepoints for observabilities of those tuned values and the tuning itself. This patch (of 2): Aim-oriented monitoring intervals auto-tuning is an important and recommended feature for DAMON users. Add a trace event for the observability of the tuned intervals and tuning itself. Link: https://lkml.kernel.org/r/20250704221408.38510-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250704221408.38510-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: kernel test robot Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index da4bd9fd1162..32c611076023 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -48,6 +48,23 @@ TRACE_EVENT_CONDITION(damos_before_apply, __entry->nr_accesses, __entry->age) ); +TRACE_EVENT(damon_monitor_intervals_tune, + + TP_PROTO(unsigned long sample_us), + + TP_ARGS(sample_us), + + TP_STRUCT__entry( + __field(unsigned long, sample_us) + ), + + TP_fast_assign( + __entry->sample_us = sample_us; + ), + + TP_printk("sample_us=%lu", __entry->sample_us) +); + TRACE_EVENT(damon_aggregated, TP_PROTO(unsigned int target_id, struct damon_region *r, -- cgit v1.2.3 From a86d695193bfab3f130f9275c275e4e143dcd2e3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 4 Jul 2025 15:14:08 -0700 Subject: mm/damon: add trace event for effective size quota Aim-oriented DAMOS quota auto-tuning is an important and recommended feature for DAMOS users. Add a trace event for the observability of the tuned quota and tuning itself. [sj@kernel.org: initialize sidx in damos_trace_esz()] Link: https://lkml.kernel.org/r/20250705172003.52324-1-sj@kernel.org [sj@kernel.org: make damos_esz unconditional trace event] Link: https://lkml.kernel.org/r/20250709182843.35812-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250704221408.38510-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: kernel test robot Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 32c611076023..852d725afea2 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -9,6 +9,30 @@ #include #include +TRACE_EVENT(damos_esz, + + TP_PROTO(unsigned int context_idx, unsigned int scheme_idx, + unsigned long esz), + + TP_ARGS(context_idx, scheme_idx, esz), + + TP_STRUCT__entry( + __field(unsigned int, context_idx) + __field(unsigned int, scheme_idx) + __field(unsigned long, esz) + ), + + TP_fast_assign( + __entry->context_idx = context_idx; + __entry->scheme_idx = scheme_idx; + __entry->esz = esz; + ), + + TP_printk("ctx_idx=%u scheme_idx=%u esz=%lu", + __entry->context_idx, __entry->scheme_idx, + __entry->esz) +); + TRACE_EVENT_CONDITION(damos_before_apply, TP_PROTO(unsigned int context_idx, unsigned int scheme_idx, -- cgit v1.2.3 From 1bfe6354e0975fe89c3d25e81b6546d205556a4b Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 7 Jul 2025 22:08:04 +0800 Subject: ext4: process folios writeback in bytes Since ext4 supports large folios, processing writebacks in pages is no longer appropriate, it can be modified to process writebacks in bytes. Suggested-by: Jan Kara Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250707140814.542883-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 156908641e68..62d52997b5c6 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -483,15 +483,15 @@ TRACE_EVENT(ext4_writepages, ); TRACE_EVENT(ext4_da_write_pages, - TP_PROTO(struct inode *inode, pgoff_t first_page, + TP_PROTO(struct inode *inode, loff_t start_pos, struct writeback_control *wbc), - TP_ARGS(inode, first_page, wbc), + TP_ARGS(inode, start_pos, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( pgoff_t, first_page ) + __field( loff_t, start_pos ) __field( long, nr_to_write ) __field( int, sync_mode ) ), @@ -499,15 +499,14 @@ TRACE_EVENT(ext4_da_write_pages, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->first_page = first_page; + __entry->start_pos = start_pos; __entry->nr_to_write = wbc->nr_to_write; __entry->sync_mode = wbc->sync_mode; ), - TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " - "sync_mode %d", + TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->first_page, + (unsigned long) __entry->ino, __entry->start_pos, __entry->nr_to_write, __entry->sync_mode) ); -- cgit v1.2.3 From 6b132759b0fe78e518abafb62190c294100db6d6 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 7 Jul 2025 22:08:09 +0800 Subject: ext4: enhance tracepoints during the folios writeback After mpage_map_and_submit_extent() supports restarting handle if credits are insufficient during allocating blocks, it is more likely to exit the current mapping iteration and continue to process the current processing partially mapped folio again. The existing tracepoints are not sufficient to track this situation, so enhance the tracepoints to track the writeback position and the return value before and after submitting the folios. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250707140814.542883-7-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 62d52997b5c6..845451077c41 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -482,16 +482,17 @@ TRACE_EVENT(ext4_writepages, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_pages, - TP_PROTO(struct inode *inode, loff_t start_pos, +TRACE_EVENT(ext4_da_write_folios_start, + TP_PROTO(struct inode *inode, loff_t start_pos, loff_t next_pos, struct writeback_control *wbc), - TP_ARGS(inode, start_pos, wbc), + TP_ARGS(inode, start_pos, next_pos, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, start_pos ) + __field( loff_t, next_pos ) __field( long, nr_to_write ) __field( int, sync_mode ) ), @@ -500,16 +501,47 @@ TRACE_EVENT(ext4_da_write_pages, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start_pos = start_pos; + __entry->next_pos = next_pos; __entry->nr_to_write = wbc->nr_to_write; __entry->sync_mode = wbc->sync_mode; ), - TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d", + TP_printk("dev %d,%d ino %lu start_pos 0x%llx next_pos 0x%llx nr_to_write %ld sync_mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->start_pos, + (unsigned long) __entry->ino, __entry->start_pos, __entry->next_pos, __entry->nr_to_write, __entry->sync_mode) ); +TRACE_EVENT(ext4_da_write_folios_end, + TP_PROTO(struct inode *inode, loff_t start_pos, loff_t next_pos, + struct writeback_control *wbc, int ret), + + TP_ARGS(inode, start_pos, next_pos, wbc, ret), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, start_pos ) + __field( loff_t, next_pos ) + __field( long, nr_to_write ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->start_pos = start_pos; + __entry->next_pos = next_pos; + __entry->nr_to_write = wbc->nr_to_write; + __entry->ret = ret; + ), + + TP_printk("dev %d,%d ino %lu start_pos 0x%llx next_pos 0x%llx nr_to_write %ld ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, __entry->start_pos, __entry->next_pos, + __entry->nr_to_write, __entry->ret) +); + TRACE_EVENT(ext4_da_write_pages_extent, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map), -- cgit v1.2.3 From c3ff7f06c7876bc292cac1c7d4df3d0bfd74f3b7 Mon Sep 17 00:00:00 2001 From: I Viswanath Date: Wed, 9 Jul 2025 20:37:18 +0530 Subject: i2c: Clarify behavior of I2C_M_RD flag Update the description of I2C_M_RD to clarify that not setting it signals a write transaction Signed-off-by: I Viswanath Signed-off-by: Wolfram Sang --- include/uapi/linux/i2c.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index 92326ebde350..a2db2a56c8b0 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -21,7 +21,8 @@ * * @flags: * Supported by all adapters: - * %I2C_M_RD: read data (from slave to master). Guaranteed to be 0x0001! + * %I2C_M_RD: read data (from slave to master). Guaranteed to be 0x0001! If + * not set, the transaction is interpreted as write. * * Optional: * %I2C_M_DMA_SAFE: the buffer of this message is DMA safe. Makes only sense -- cgit v1.2.3 From 0df11950099887520cc8bf22a790a5535be30e8d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 13 Jul 2025 14:26:25 +0100 Subject: xen: Remove some deadcode (x) Remove three uncalled functions: xenbus_mkdir() was added in 2007 by commit 4bac07c993d0 ("xen: add the Xenbus sysfs and virtual device hotplug driver") but has remained unused. xen_get_runstate_snapshot() last use was removed in 2016 by commit 6ba286ad8457 ("xen: support runqueue steal time on xen") which replaces the use by the _cpu version. xen_resume_notifier_unregister() last use was removed in 2017 by commit 1914f0cd203c ("xen/acpi: upload PM state from init-domain to Xen") Remove them. Signed-off-by: "Dr. David Alan Gilbert" Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250713132625.164728-1-linux@treblig.org> --- include/xen/xen-ops.h | 2 -- include/xen/xenbus.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 47f11bec5e90..9e2a769b0d96 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -30,13 +30,11 @@ void xen_arch_suspend(void); void xen_reboot(int reason); void xen_resume_notifier_register(struct notifier_block *nb); -void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); void xen_time_setup_guest(void); void xen_manage_runstate_time(int action); -void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); u64 xen_steal_clock(int cpu); int xen_setup_shutdown_event(void); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 3f90bdd387b6..1c23e6387f13 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -154,8 +154,6 @@ void *xenbus_read(struct xenbus_transaction t, const char *dir, const char *node, unsigned int *len); int xenbus_write(struct xenbus_transaction t, const char *dir, const char *node, const char *string); -int xenbus_mkdir(struct xenbus_transaction t, - const char *dir, const char *node); int xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node); int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node); -- cgit v1.2.3 From 67fd9615a782b11cd0c62823d722a815c9e1eb75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:26 +0200 Subject: iomap: pass more arguments using the iomap writeback context Add inode and wpc fields to pass the inode and writeback context that are needed in the entire writeback call chain, and let the callers initialize all fields in the writeback context before calling iomap_writepages to simplify the argument passing. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-3-hch@lst.de Reviewed-by: Brian Foster Reviewed-by: Joanne Koong Reviewed-by: Johannes Thumshirn Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 522644d62f30..00179c9387c5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -448,6 +448,8 @@ struct iomap_writeback_ops { struct iomap_writepage_ctx { struct iomap iomap; + struct inode *inode; + struct writeback_control *wbc; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; u32 nr_folios; /* folios added to the ioend */ @@ -461,9 +463,7 @@ void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); -int iomap_writepages(struct address_space *mapping, - struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, - const struct iomap_writeback_ops *ops); +int iomap_writepages(struct iomap_writepage_ctx *wpc); /* * Flags for direct I/O ->end_io: -- cgit v1.2.3 From fb7399cf2d0b33825b8039f95c45395c7deba25c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:28 +0200 Subject: iomap: refactor the writeback interface Replace ->map_blocks with a new ->writeback_range, which differs in the following ways: - it must also queue up the I/O for writeback, that is called into the slightly refactored and extended in scope iomap_add_to_ioend for each region - can handle only a part of the requested region, that is the retry loop for partial mappings moves to the caller - handles cleanup on failures as well, and thus also replaces the discard_folio method only implemented by XFS. This will allow to use the iomap writeback code also for file systems that are not block based like fuse. Co-developed-by: Joanne Koong Signed-off-by: Joanne Koong Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-5-hch@lst.de Acked-by: Damien Le Moal # zonefs Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 00179c9387c5..625d7911a2b5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -416,18 +416,20 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) struct iomap_writeback_ops { /* - * Required, maps the blocks so that writeback can be performed on - * the range starting at offset. + * Required, performs writeback on the passed in range * - * Can return arbitrarily large regions, but we need to call into it at + * Can map arbitrarily large regions, but we need to call into it at * least once per folio to allow the file systems to synchronize with * the write path that could be invalidating mappings. * * An existing mapping from a previous call to this method can be reused * by the file system if it is still valid. + * + * Returns the number of bytes processed or a negative errno. */ - int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned len); + ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 pos, unsigned int len, + u64 end_pos); /* * Optional, allows the file systems to hook into bio submission, @@ -438,12 +440,6 @@ struct iomap_writeback_ops { * the bio could not be submitted. */ int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); - - /* - * Optional, allows the file system to discard state on a page where - * we failed to submit any I/O. - */ - void (*discard_folio)(struct folio *folio, loff_t pos); }; struct iomap_writepage_ctx { @@ -463,6 +459,9 @@ void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); +ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, + loff_t pos, loff_t end_pos, unsigned int dirty_len); + int iomap_writepages(struct iomap_writepage_ctx *wpc); /* -- cgit v1.2.3 From f4fa7981fa26c664cc540cbce9bcb7ffe02a8912 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:29 +0200 Subject: iomap: hide ioends from the generic writeback code Replace the ioend pointer in iomap_writeback_ctx with a void *wb_ctx one to facilitate non-block, non-ioend writeback for use. Rename the submit_ioend method to writeback_submit and make it mandatory so that the generic writeback code stops seeing ioends and bios. Co-developed-by: Joanne Koong Signed-off-by: Joanne Koong Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-6-hch@lst.de Acked-by: Damien Le Moal Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 625d7911a2b5..9f32dd8dc075 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -391,8 +391,7 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, /* * Structure for writeback I/O completions. * - * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io - * for direct I/O) can split a bio generated by iomap. In that case the parent + * File systems can split a bio generated by iomap. In that case the parent * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { @@ -416,7 +415,7 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) struct iomap_writeback_ops { /* - * Required, performs writeback on the passed in range + * Performs writeback on the passed in range * * Can map arbitrarily large regions, but we need to call into it at * least once per folio to allow the file systems to synchronize with @@ -432,23 +431,22 @@ struct iomap_writeback_ops { u64 end_pos); /* - * Optional, allows the file systems to hook into bio submission, - * including overriding the bi_end_io handler. + * Submit a writeback context previously build up by ->writeback_range. * - * Returns 0 if the bio was successfully submitted, or a negative - * error code if status was non-zero or another error happened and - * the bio could not be submitted. + * Returns 0 if the context was successfully submitted, or a negative + * error code if not. If @error is non-zero a failure occurred, and + * the writeback context should be completed with an error. */ - int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); + int (*writeback_submit)(struct iomap_writepage_ctx *wpc, int error); }; struct iomap_writepage_ctx { struct iomap iomap; struct inode *inode; struct writeback_control *wbc; - struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; u32 nr_folios; /* folios added to the ioend */ + void *wb_ctx; /* pending writeback context */ }; struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, @@ -461,6 +459,7 @@ void iomap_ioend_try_merge(struct iomap_ioend *ioend, void iomap_sort_ioends(struct list_head *ioend_list); ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); +int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); int iomap_writepages(struct iomap_writepage_ctx *wpc); -- cgit v1.2.3 From 9caf1ea80cedf7d35d9371c44fbe5f84b0da667a Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 10 Jul 2025 15:33:30 +0200 Subject: iomap: add public helpers for uptodate state manipulation Add a new iomap_start_folio_write helper to abstract away the write_bytes_pending handling, and export it and the existing iomap_finish_folio_write for non-iomap writeback in fuse. Signed-off-by: Joanne Koong [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-7-hch@lst.de Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 9f32dd8dc075..cbf9d299a616 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -461,6 +461,11 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); +void iomap_start_folio_write(struct inode *inode, struct folio *folio, + size_t len); +void iomap_finish_folio_write(struct inode *inode, struct folio *folio, + size_t len); + int iomap_writepages(struct iomap_writepage_ctx *wpc); /* -- cgit v1.2.3 From 8b217cf779cba2b10112f6845dcbbb7e6f4b3d75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:34 +0200 Subject: iomap: export iomap_writeback_folio Allow fuse to use iomap_writeback_folio for folio laundering. Note that the caller needs to manually submit the pending writeback context. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-11-hch@lst.de Reviewed-by: Joanne Koong Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index cbf9d299a616..b65d3f063bb0 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -466,6 +466,7 @@ void iomap_start_folio_write(struct inode *inode, struct folio *folio, void iomap_finish_folio_write(struct inode *inode, struct folio *folio, size_t len); +int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio); int iomap_writepages(struct iomap_writepage_ctx *wpc); /* -- cgit v1.2.3 From 2a5574fc57d13031f869c409181bdeadd75770e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:35 +0200 Subject: iomap: replace iomap_folio_ops with iomap_write_ops The iomap_folio_ops are only used for buffered writes, including the zero and unshare variants. Rename them to iomap_write_ops to better describe the usage, and pass them through the call chain like the other operation specific methods instead of through the iomap. xfs_iomap_valid grows a IOMAP_HOLE check to keep the existing behavior that never attached the folio_ops to a iomap representing a hole. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-12-hch@lst.de Acked-by: Damien Le Moal Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b65d3f063bb0..80f543cc4fe8 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -101,8 +101,6 @@ struct vm_fault; */ #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ -struct iomap_folio_ops; - struct iomap { u64 addr; /* disk offset of mapping, bytes */ loff_t offset; /* file offset of mapping, bytes */ @@ -113,7 +111,6 @@ struct iomap { struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; void *private; /* filesystem private */ - const struct iomap_folio_ops *folio_ops; u64 validity_cookie; /* used with .iomap_valid() */ }; @@ -143,16 +140,11 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) } /* - * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio - * and put_folio will be called for each folio written to. This only applies - * to buffered writes as unbuffered writes will not typically have folios - * associated with them. - * * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ -struct iomap_folio_ops { +struct iomap_write_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, @@ -335,7 +327,8 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) } ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, - const struct iomap_ops *ops, void *private); + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private); int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); @@ -344,11 +337,14 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, - const struct iomap_ops *ops); + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, const struct iomap_ops *ops, void *private); + bool *did_zero, const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops, void *private); + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, void *private); typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, -- cgit v1.2.3 From c5690dd0197809bc5305f474a71b2e71e7eac0ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:37 +0200 Subject: iomap: add read_folio_range() handler for buffered writes Add a read_folio_range() handler for buffered writes that filesystems may pass in if they wish to provide a custom handler for synchronously reading in the contents of a folio. Signed-off-by: Joanne Koong [hch: renamed to read_folio_range, pass less arguments] Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250710133343.399917-14-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 80f543cc4fe8..73dceabc21c8 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -166,6 +166,16 @@ struct iomap_write_ops { * locked by the iomap code. */ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); + + /* + * Optional if the filesystem wishes to provide a custom handler for + * reading in the contents of a folio, otherwise iomap will default to + * submitting a bio read request. + * + * The read must be done synchronously. + */ + int (*read_folio_range)(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len); }; /* -- cgit v1.2.3 From e075f4360931263f5ec006ea5dadc065e5e98eb8 Mon Sep 17 00:00:00 2001 From: Li Chen Date: Thu, 10 Jul 2025 18:57:07 +0800 Subject: smpboot: introduce SDTL_INIT() helper to tidy sched topology setup Define a small SDTL_INIT(maskfn, flagsfn, name) macro and use it to build the sched_domain_topology_level array. Purely a cleanup; behaviour is unchanged. Suggested-by: Thomas Gleixner Signed-off-by: Li Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: K Prateek Nayak Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20250710105715.66594-2-me@linux.beauty --- include/linux/sched/topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index e54e7fa76ba6..0d5daaa277b7 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -196,8 +196,8 @@ struct sched_domain_topology_level { extern void __init set_sched_topology(struct sched_domain_topology_level *tl); extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); - -# define SD_INIT_NAME(type) .name = #type +#define SDTL_INIT(maskfn, flagsfn, dname) ((struct sched_domain_topology_level) \ + { .mask = maskfn, .sd_flags = flagsfn, .name = #dname }) #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern void rebuild_sched_domains_energy(void); -- cgit v1.2.3 From 1eec89a671413ce38df9fe9e70f5130a9eb79a59 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 11 Jul 2025 11:20:30 +0530 Subject: sched/topology: Remove sched_domain_topology_level::flags Support for overlapping domains added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") also allowed forcefully setting SD_OVERLAP for !NUMA domains via FORCE_SD_OVERLAP sched_feat(). Since NUMA domains had to be presumed overlapping to ensure correct behavior, "sched_domain_topology_level::flags" was introduced. NUMA domains added the SDTL_OVERLAP flag would ensure SD_OVERLAP was always added during build_sched_domains() for these domains, even when FORCE_SD_OVERLAP was off. Condition for adding the SD_OVERLAP flag at the aforementioned commit was as follows: if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) sd->flags |= SD_OVERLAP; The FORCE_SD_OVERLAP debug feature was removed in commit af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") which left the NUMA domains as the exclusive users of SDTL_OVERLAP, SD_OVERLAP, and SD_NUMA flags. Get rid of SDTL_OVERLAP and SD_OVERLAP as they have become redundant and instead rely on SD_NUMA to detect the only overlapping domain currently supported. Since SDTL_OVERLAP was the only user of "tl->flags", get rid of "sched_domain_topology_level::flags" too. Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/ba4dbdf8-bc37-493d-b2e0-2efb00ea3e19@amd.com --- include/linux/sched/sd_flags.h | 8 -------- include/linux/sched/topology.h | 3 --- 2 files changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index b04a5d04dee9..42839cfa2778 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -153,14 +153,6 @@ SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS) */ SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS) -/* - * sched_groups of this level overlap - * - * SHARED_PARENT: Set for all NUMA levels above NODE. - * NEEDS_GROUPS: Overlaps can only exist with more than one group. - */ -SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) - /* * Cross-node balancing * diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 0d5daaa277b7..5263746b63e8 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -175,8 +175,6 @@ bool cpus_share_resources(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); -#define SDTL_OVERLAP 0x01 - struct sd_data { struct sched_domain *__percpu *sd; struct sched_domain_shared *__percpu *sds; @@ -187,7 +185,6 @@ struct sd_data { struct sched_domain_topology_level { sched_domain_mask_f mask; sched_domain_flags_f sd_flags; - int flags; int numa_level; struct sd_data data; char *name; -- cgit v1.2.3 From 89635eae076cd8eaa5cb752f66538c9dc6c9fdc3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Jul 2025 16:10:01 +0100 Subject: netfs: Fix race between cache write completion and ALL_QUEUED being set When netfslib is issuing subrequests, the subrequests start processing immediately and may complete before we reach the end of the issuing function. At the end of the issuing function we set NETFS_RREQ_ALL_QUEUED to indicate to the collector that we aren't going to issue any more subreqs and that it can do the final notifications and cleanup. Now, this isn't a problem if the request is synchronous (NETFS_RREQ_OFFLOAD_COLLECTION is unset) as the result collection will be done in-thread and we're guaranteed an opportunity to run the collector. However, if the request is asynchronous, collection is primarily triggered by the termination of subrequests queuing it on a workqueue. Now, a race can occur here if the app thread sets ALL_QUEUED after the last subrequest terminates. This can happen most easily with the copy2cache code (as used by Ceph) where, in the collection routine of a read request, an asynchronous write request is spawned to copy data to the cache. Folios are added to the write request as they're unlocked, but there may be a delay before ALL_QUEUED is set as the write subrequests may complete before we get there. If all the write subreqs have finished by the ALL_QUEUED point, no further events happen and the collection never happens, leaving the request hanging. Fix this by queuing the collector after setting ALL_QUEUED. This is a bit heavy-handed and it may be sufficient to do it only if there are no extant subreqs. Also add a tracepoint to cross-reference both requests in a copy-to-request operation and add a trace to the netfs_rreq tracepoint to indicate the setting of ALL_QUEUED. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Reported-by: Max Kellermann Link: https://lore.kernel.org/r/CAKPOu+8z_ijTLHdiCYGU_Uk7yYD=shxyGLwfe-L7AV3DhebS3w@mail.gmail.com/ Signed-off-by: David Howells Link: https://lore.kernel.org/20250711151005.2956810-3-dhowells@redhat.com Reviewed-by: Paulo Alcantara (Red Hat) cc: Paulo Alcantara cc: Viacheslav Dubeyko cc: Alex Markuze cc: Ilya Dryomov cc: netfs@lists.linux.dev cc: ceph-devel@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 73e96ccbe830..64a382fbc31a 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -55,6 +55,7 @@ EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ + EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \ EM(netfs_rreq_trace_free, "FREE ") \ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ @@ -559,6 +560,35 @@ TRACE_EVENT(netfs_write, __entry->start, __entry->start + __entry->len - 1) ); +TRACE_EVENT(netfs_copy2cache, + TP_PROTO(const struct netfs_io_request *rreq, + const struct netfs_io_request *creq), + + TP_ARGS(rreq, creq), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, creq) + __field(unsigned int, cookie) + __field(unsigned int, ino) + ), + + TP_fast_assign( + struct netfs_inode *__ctx = netfs_inode(rreq->inode); + struct fscache_cookie *__cookie = netfs_i_cookie(__ctx); + __entry->rreq = rreq->debug_id; + __entry->creq = creq->debug_id; + __entry->cookie = __cookie ? __cookie->debug_id : 0; + __entry->ino = rreq->inode->i_ino; + ), + + TP_printk("R=%08x CR=%08x c=%08x i=%x ", + __entry->rreq, + __entry->creq, + __entry->cookie, + __entry->ino) + ); + TRACE_EVENT(netfs_collect, TP_PROTO(const struct netfs_io_request *wreq), -- cgit v1.2.3 From 45b9d1da6ca0d0285140f8779793b537e4560d45 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Mon, 23 Jun 2025 22:34:00 +0800 Subject: PM / devfreq: Allow devfreq driver to add custom sysfs ABIs Extend the devfreq_dev_profile to allow drivers optionally create device-specific sysfs ABIs together with other common devfreq ABIs under the devfreq device path. Reviewed-by: Jonathan Cameron Reviewed-by: Huisong Li Signed-off-by: Jie Zhan Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/20250623143401.4095045-2-zhanjie9@hisilicon.com/ --- include/linux/devfreq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index d312ffbac4dd..dc1075dc3446 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -103,6 +103,8 @@ struct devfreq_dev_status { * * @is_cooling_device: A self-explanatory boolean giving the device a * cooling effect property. + * @dev_groups: Optional device-specific sysfs attribute groups that to + * be attached to the devfreq device. */ struct devfreq_dev_profile { unsigned long initial_freq; @@ -119,6 +121,8 @@ struct devfreq_dev_profile { unsigned int max_state; bool is_cooling_device; + + const struct attribute_group **dev_groups; }; /** -- cgit v1.2.3 From 36a686c0784fcccdaa4f38b498a9ef0d42ea7cb8 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 10 Jul 2025 18:43:42 +0200 Subject: Revert "netfilter: nf_tables: Add notifications for hook changes" This reverts commit 465b9ee0ee7bc268d7f261356afd6c4262e48d82. Such notifications fit better into core or nfnetlink_hook code, following the NFNL_MSG_HOOK_GET message format. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 ----- include/uapi/linux/netfilter/nf_tables.h | 10 ---------- include/uapi/linux/netfilter/nfnetlink.h | 2 -- 3 files changed, 17 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e4d8e451e935..5e49619ae49c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1142,11 +1142,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -struct nft_hook; -void nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event); - enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 518ba144544c..2beb30be2c5f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -142,8 +142,6 @@ enum nf_tables_msg_types { NFT_MSG_DESTROYOBJ, NFT_MSG_DESTROYFLOWTABLE, NFT_MSG_GETSETELEM_RESET, - NFT_MSG_NEWDEV, - NFT_MSG_DELDEV, NFT_MSG_MAX, }; @@ -1786,18 +1784,10 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) - * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING) - * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, - NFTA_DEVICE_TABLE, - NFTA_DEVICE_FLOWTABLE, - NFTA_DEVICE_CHAIN, - NFTA_DEVICE_SPEC, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 50d807af2649..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -25,8 +25,6 @@ enum nfnetlink_groups { #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA NFNLGRP_NFTRACE, #define NFNLGRP_NFTRACE NFNLGRP_NFTRACE - NFNLGRP_NFT_DEV, -#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) -- cgit v1.2.3 From 25c411fce735dda29de26f58d3fce52d4824380c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Sat, 12 Jul 2025 03:33:42 +0000 Subject: sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument sched_proxy_exec= that can be used to disable the feature at boot time if CONFIG_SCHED_PROXY_EXEC was enabled. Also uses this option to allow the rq->donor to be different from rq->curr. Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Tested-by: K Prateek Nayak Link: https://lkml.kernel.org/r/20250712033407.2383110-2-jstultz@google.com --- include/linux/sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 54a91261e99b..f225b6b1baa3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1656,6 +1656,19 @@ struct task_struct { randomized_struct_fields_end } __attribute__ ((aligned (64))); +#ifdef CONFIG_SCHED_PROXY_EXEC +DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec); +static inline bool sched_proxy_exec(void) +{ + return static_branch_likely(&__sched_proxy_exec); +} +#else +static inline bool sched_proxy_exec(void) +{ + return false; +} +#endif + #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -- cgit v1.2.3 From 44e4e0297c3c01987399bb9973f4d22a096a62c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 12 Jul 2025 03:33:43 +0000 Subject: locking/mutex: Rework task_struct::blocked_on Track the blocked-on relation for mutexes, to allow following this relation at schedule time. task | blocked-on v mutex | owner v task This all will be used for tracking blocked-task/mutex chains with the prox-execution patch in a similar fashion to how priority inheritance is done with rt_mutexes. For serialization, blocked-on is only set by the task itself (current). And both when setting or clearing (potentially by others), is done while holding the mutex::wait_lock. [minor changes while rebasing] [jstultz: Fix blocked_on tracking in __mutex_lock_common in error paths] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Juri Lelli Signed-off-by: Connor O'Brien Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Tested-by: K Prateek Nayak Link: https://lkml.kernel.org/r/20250712033407.2383110-3-jstultz@google.com --- include/linux/sched.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f225b6b1baa3..33ad240ec900 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1230,10 +1230,7 @@ struct task_struct { struct rt_mutex_waiter *pi_blocked_on; #endif -#ifdef CONFIG_DEBUG_MUTEXES - /* Mutex deadlock detection: */ - struct mutex_waiter *blocked_on; -#endif + struct mutex *blocked_on; /* lock we're blocked on */ #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER /* -- cgit v1.2.3 From a4f0b6fef4b08e9928449206390133e48ac185a7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Sat, 12 Jul 2025 03:33:44 +0000 Subject: locking/mutex: Add p->blocked_on wrappers for correctness checks This lets us assert mutex::wait_lock is held whenever we access p->blocked_on, as well as warn us for unexpected state changes. [fix conflicts, call in more places] [jstultz: tweaked commit subject, reworked a good bit] Signed-off-by: Valentin Schneider Signed-off-by: Connor O'Brien Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Tested-by: K Prateek Nayak Link: https://lkml.kernel.org/r/20250712033407.2383110-4-jstultz@google.com --- include/linux/sched.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33ad240ec900..5b4e1cd52e27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -2129,6 +2130,67 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock); __cond_resched_rwlock_write(lock); \ }) +#ifndef CONFIG_PREEMPT_RT +static inline struct mutex *__get_task_blocked_on(struct task_struct *p) +{ + struct mutex *m = p->blocked_on; + + if (m) + lockdep_assert_held_once(&m->wait_lock); + return m; +} + +static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + WARN_ON_ONCE(!m); + /* The task should only be setting itself as blocked */ + WARN_ON_ONCE(p != current); + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * Check ensure we don't overwrite existing mutex value + * with a different mutex. Note, setting it to the same + * lock repeatedly is ok. + */ + WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); + p->blocked_on = m; +} + +static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + guard(raw_spinlock_irqsave)(&m->wait_lock); + __set_task_blocked_on(p, m); +} + +static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + WARN_ON_ONCE(!m); + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); + p->blocked_on = NULL; +} + +static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + guard(raw_spinlock_irqsave)(&m->wait_lock); + __clear_task_blocked_on(p, m); +} +#else +static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) +{ +} + +static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) +{ +} +#endif /* !CONFIG_PREEMPT_RT */ + static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); @@ -2168,8 +2230,6 @@ extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); -#include - /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. -- cgit v1.2.3 From 8671bad873ebeb082afcf7b4501395c374da6023 Mon Sep 17 00:00:00 2001 From: "Luis Claudio R. Goncalves" Date: Mon, 7 Jul 2025 11:03:59 -0300 Subject: sched: Do not call __put_task_struct() on rt if pi_blocked_on is set With PREEMPT_RT enabled, some of the calls to put_task_struct() coming from rt_mutex_adjust_prio_chain() could happen in preemptible context and with a mutex enqueued. That could lead to this sequence: rt_mutex_adjust_prio_chain() put_task_struct() __put_task_struct() sched_ext_free() spin_lock_irqsave() rtlock_lock() ---> TRIGGERS lockdep_assert(!current->pi_blocked_on); This is not a SCHED_EXT bug. The first cleanup function called by __put_task_struct() is sched_ext_free() and it happens to take a (RT) spin_lock, which in the scenario described above, would trigger the lockdep assertion of "!current->pi_blocked_on". Crystal Wood was able to identify the problem as __put_task_struct() being called during rt_mutex_adjust_prio_chain(), in the context of a process with a mutex enqueued. Instead of adding more complex conditions to decide when to directly call __put_task_struct() and when to defer the call, unconditionally resort to the deferred call on PREEMPT_RT to simplify the code. Fixes: 893cdaaa3977 ("sched: avoid false lockdep splat in put_task_struct()") Suggested-by: Crystal Wood Signed-off-by: Luis Claudio R. Goncalves Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Wander Lairson Costa Reviewed-by: Valentin Schneider Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/aGvTz5VaPFyj0pBV@uudg.org --- include/linux/sched/task.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index c517dbc242f7..ea41795a352b 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -131,24 +131,17 @@ static inline void put_task_struct(struct task_struct *t) return; /* - * In !RT, it is always safe to call __put_task_struct(). - * Under RT, we can only call it in preemptible context. - */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) { - static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP); - - lock_map_acquire_try(&put_task_map); - __put_task_struct(t); - lock_map_release(&put_task_map); - return; - } - - /* - * under PREEMPT_RT, we can't call put_task_struct + * Under PREEMPT_RT, we can't call __put_task_struct * in atomic context because it will indirectly - * acquire sleeping locks. + * acquire sleeping locks. The same is true if the + * current process has a mutex enqueued (blocked on + * a PI chain). + * + * In !RT, it is always safe to call __put_task_struct(). + * Though, in order to simplify the code, resort to the + * deferred call too. * - * call_rcu() will schedule delayed_put_task_struct_rcu() + * call_rcu() will schedule __put_task_struct_rcu_cb() * to be called in process context. * * __put_task_struct() is called when @@ -161,7 +154,7 @@ static inline void put_task_struct(struct task_struct *t) * * delayed_free_task() also uses ->rcu, but it is only called * when it fails to fork a process. Therefore, there is no - * way it can conflict with put_task_struct(). + * way it can conflict with __put_task_struct(). */ call_rcu(&t->rcu, __put_task_struct_rcu_cb); } -- cgit v1.2.3 From 7941ad696506917fa6228f44be2df0c2f0909a62 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 11 Jul 2025 14:58:43 -0700 Subject: lib/crypto: sha2: Add hmac_sha*_init_usingrawkey() While the HMAC library functions support both incremental and one-shot computation and both prepared and raw keys, the combination of raw key + incremental was missing. It turns out that several potential users of the HMAC library functions (tpm2-sessions.c, smb2transport.c, trusted_tpm1.c) want exactly that. Therefore, add the missing functions hmac_sha*_init_usingrawkey(). Implement them in an optimized way that directly initializes the HMAC context without a separate key preparation step. Reimplement the one-shot raw key functions hmac_sha*_usingrawkey() on top of the new functions, which makes them a bit more efficient. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250711215844.41715-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index e0a08f6addd0..15e461e568cc 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -247,6 +247,21 @@ static inline void hmac_sha224_init(struct hmac_sha224_ctx *ctx, __hmac_sha256_init(&ctx->ctx, &key->key); } +/** + * hmac_sha224_init_usingrawkey() - Initialize an HMAC-SHA224 context for a new + * message, using a raw key + * @ctx: (output) the HMAC context to initialize + * @raw_key: the raw HMAC-SHA224 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * If you don't need incremental computation, consider hmac_sha224_usingrawkey() + * instead. + * + * Context: Any context. + */ +void hmac_sha224_init_usingrawkey(struct hmac_sha224_ctx *ctx, + const u8 *raw_key, size_t raw_key_len); + /** * hmac_sha224_update() - Update an HMAC-SHA224 context with message data * @ctx: the HMAC context to update; must have been initialized @@ -405,6 +420,21 @@ static inline void hmac_sha256_init(struct hmac_sha256_ctx *ctx, __hmac_sha256_init(&ctx->ctx, &key->key); } +/** + * hmac_sha256_init_usingrawkey() - Initialize an HMAC-SHA256 context for a new + * message, using a raw key + * @ctx: (output) the HMAC context to initialize + * @raw_key: the raw HMAC-SHA256 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * If you don't need incremental computation, consider hmac_sha256_usingrawkey() + * instead. + * + * Context: Any context. + */ +void hmac_sha256_init_usingrawkey(struct hmac_sha256_ctx *ctx, + const u8 *raw_key, size_t raw_key_len); + /** * hmac_sha256_update() - Update an HMAC-SHA256 context with message data * @ctx: the HMAC context to update; must have been initialized @@ -597,6 +627,21 @@ static inline void hmac_sha384_init(struct hmac_sha384_ctx *ctx, __hmac_sha512_init(&ctx->ctx, &key->key); } +/** + * hmac_sha384_init_usingrawkey() - Initialize an HMAC-SHA384 context for a new + * message, using a raw key + * @ctx: (output) the HMAC context to initialize + * @raw_key: the raw HMAC-SHA384 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * If you don't need incremental computation, consider hmac_sha384_usingrawkey() + * instead. + * + * Context: Any context. + */ +void hmac_sha384_init_usingrawkey(struct hmac_sha384_ctx *ctx, + const u8 *raw_key, size_t raw_key_len); + /** * hmac_sha384_update() - Update an HMAC-SHA384 context with message data * @ctx: the HMAC context to update; must have been initialized @@ -755,6 +800,21 @@ static inline void hmac_sha512_init(struct hmac_sha512_ctx *ctx, __hmac_sha512_init(&ctx->ctx, &key->key); } +/** + * hmac_sha512_init_usingrawkey() - Initialize an HMAC-SHA512 context for a new + * message, using a raw key + * @ctx: (output) the HMAC context to initialize + * @raw_key: the raw HMAC-SHA512 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * If you don't need incremental computation, consider hmac_sha512_usingrawkey() + * instead. + * + * Context: Any context. + */ +void hmac_sha512_init_usingrawkey(struct hmac_sha512_ctx *ctx, + const u8 *raw_key, size_t raw_key_len); + /** * hmac_sha512_update() - Update an HMAC-SHA512 context with message data * @ctx: the HMAC context to update; must have been initialized -- cgit v1.2.3 From 9503ca2ccafec51ee9e533d6f3aef14a589fc106 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 12 Jul 2025 16:22:53 -0700 Subject: lib/crypto: sha1: Rename sha1_init() to sha1_init_raw() Rename the existing sha1_init() to sha1_init_raw(), since it conflicts with the upcoming library function. This will later be removed, but this keeps the kernel building for the introduction of the library. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250712232329.818226-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h index f48230b1413c..d853d3b93169 100644 --- a/include/crypto/sha1.h +++ b/include/crypto/sha1.h @@ -33,7 +33,7 @@ struct sha1_state { */ #define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / 4) #define SHA1_WORKSPACE_WORDS 16 -void sha1_init(__u32 *buf); +void sha1_init_raw(__u32 *buf); void sha1_transform(__u32 *digest, const char *data, __u32 *W); #endif /* _CRYPTO_SHA1_H */ -- cgit v1.2.3 From 02bb63d1a59341032b8e7e4021e18d044bdb1786 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 3 Jul 2025 20:49:52 +0800 Subject: drm/bridge: Make dp/hdmi_audio_* callback keep the same paramter order with get_modes Make the dp/hdmi_audio_* callback maintain the same parameter order as get_modes and edid_read: first the bridge, then the connector. Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250703125027.311109-2-andyshrk@163.com [DB: added the chunk to the cdn-dp driver] Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index d2454ba83db3..ccead3edf59a 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -817,8 +817,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*hdmi_audio_startup)(struct drm_connector *connector, - struct drm_bridge *bridge); + int (*hdmi_audio_startup)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @hdmi_audio_prepare: @@ -831,8 +831,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*hdmi_audio_prepare)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*hdmi_audio_prepare)(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); @@ -847,8 +847,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - void (*hdmi_audio_shutdown)(struct drm_connector *connector, - struct drm_bridge *bridge); + void (*hdmi_audio_shutdown)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @hdmi_audio_mute_stream: @@ -861,12 +861,12 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*hdmi_audio_mute_stream)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*hdmi_audio_mute_stream)(struct drm_bridge *bridge, + struct drm_connector *connector, bool enable, int direction); - int (*hdmi_cec_init)(struct drm_connector *connector, - struct drm_bridge *bridge); + int (*hdmi_cec_init)(struct drm_bridge *bridge, + struct drm_connector *connector); int (*hdmi_cec_enable)(struct drm_bridge *bridge, bool enable); @@ -886,8 +886,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*dp_audio_startup)(struct drm_connector *connector, - struct drm_bridge *bridge); + int (*dp_audio_startup)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @dp_audio_prepare: @@ -900,8 +900,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*dp_audio_prepare)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*dp_audio_prepare)(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); @@ -916,8 +916,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - void (*dp_audio_shutdown)(struct drm_connector *connector, - struct drm_bridge *bridge); + void (*dp_audio_shutdown)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @dp_audio_mute_stream: @@ -930,8 +930,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*dp_audio_mute_stream)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*dp_audio_mute_stream)(struct drm_bridge *bridge, + struct drm_connector *connector, bool enable, int direction); /** -- cgit v1.2.3 From 5d156a9c3d5ea3dbec192121259dee2c2f938fa1 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 3 Jul 2025 20:49:53 +0800 Subject: drm/bridge: Pass down connector to drm bridge detect hook In some application scenarios, we hope to get the corresponding connector when the bridge's detect hook is invoked. In most cases, we can get the connector by drm_atomic_get_connector_for_encoder if the encoder attached to the bridge is enabled, however there will still be some scenarios where the detect hook of the bridge is called but the corresponding encoder has not been enabled yet. For instance, this occurs when the device is hot plug in for the first time. Since the call to bridge's detect is initiated by the connector, passing down the corresponding connector directly will make things simpler. Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250703125027.311109-3-andyshrk@163.com [DB: added the chunk to the cdn-dp driver] Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index ccead3edf59a..8ed80cad77ec 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -660,7 +660,8 @@ struct drm_bridge_funcs { * * drm_connector_status indicating the bridge output status. */ - enum drm_connector_status (*detect)(struct drm_bridge *bridge); + enum drm_connector_status (*detect)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @get_modes: @@ -1382,7 +1383,8 @@ drm_atomic_helper_bridge_propagate_bus_fmt(struct drm_bridge *bridge, u32 output_fmt, unsigned int *num_input_fmts); -enum drm_connector_status drm_bridge_detect(struct drm_bridge *bridge); +enum drm_connector_status +drm_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector); int drm_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *connector); const struct drm_edid *drm_bridge_edid_read(struct drm_bridge *bridge, -- cgit v1.2.3 From 90860aef630c5c9e58d05044f2866fcbfa7aa4d9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 12 Jul 2025 16:22:54 -0700 Subject: lib/crypto: sha1: Add SHA-1 library functions Add a library interface for SHA-1, following the SHA-2 one. As was the case with SHA-2, this will be useful for various in-kernel users. The crypto_shash interface will be reimplemented on top of it as well. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250712232329.818226-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha1.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h index d853d3b93169..387f6123a05e 100644 --- a/include/crypto/sha1.h +++ b/include/crypto/sha1.h @@ -36,4 +36,64 @@ struct sha1_state { void sha1_init_raw(__u32 *buf); void sha1_transform(__u32 *digest, const char *data, __u32 *W); +/* State for the SHA-1 compression function */ +struct sha1_block_state { + u32 h[SHA1_DIGEST_SIZE / 4]; +}; + +/** + * struct sha1_ctx - Context for hashing a message with SHA-1 + * @state: the compression function state + * @bytecount: number of bytes processed so far + * @buf: partial block buffer; bytecount % SHA1_BLOCK_SIZE bytes are valid + */ +struct sha1_ctx { + struct sha1_block_state state; + u64 bytecount; + u8 buf[SHA1_BLOCK_SIZE]; +}; + +/** + * sha1_init() - Initialize a SHA-1 context for a new message + * @ctx: the context to initialize + * + * If you don't need incremental computation, consider sha1() instead. + * + * Context: Any context. + */ +void sha1_init(struct sha1_ctx *ctx); + +/** + * sha1_update() - Update a SHA-1 context with message data + * @ctx: the context to update; must have been initialized + * @data: the message data + * @len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +void sha1_update(struct sha1_ctx *ctx, const u8 *data, size_t len); + +/** + * sha1_final() - Finish computing a SHA-1 message digest + * @ctx: the context to finalize; must have been initialized + * @out: (output) the resulting SHA-1 message digest + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void sha1_final(struct sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]); + +/** + * sha1() - Compute SHA-1 message digest in one shot + * @data: the message data + * @len: the data length in bytes + * @out: (output) the resulting SHA-1 message digest + * + * Context: Any context. + */ +void sha1(const u8 *data, size_t len, u8 out[SHA1_DIGEST_SIZE]); + #endif /* _CRYPTO_SHA1_H */ -- cgit v1.2.3 From 4cbc84471bb606ddfaf424709dd8d56b56d7ae7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 12 Jul 2025 16:22:55 -0700 Subject: lib/crypto: sha1: Add HMAC support Add HMAC support to the SHA-1 library, again following what was done for SHA-2. Besides providing the basis for a more streamlined "hmac(sha1)" shash, this will also be useful for multiple in-kernel users such as net/sctp/auth.c, net/ipv6/seg6_hmac.c, and security/keys/trusted-keys/trusted_tpm1.c. Those are currently using crypto_shash, but using the library functions would be much simpler. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250712232329.818226-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha1.h | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) (limited to 'include') diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h index 387f6123a05e..162a529ec841 100644 --- a/include/crypto/sha1.h +++ b/include/crypto/sha1.h @@ -96,4 +96,122 @@ void sha1_final(struct sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]); */ void sha1(const u8 *data, size_t len, u8 out[SHA1_DIGEST_SIZE]); +/** + * struct hmac_sha1_key - Prepared key for HMAC-SHA1 + * @istate: private + * @ostate: private + */ +struct hmac_sha1_key { + struct sha1_block_state istate; + struct sha1_block_state ostate; +}; + +/** + * struct hmac_sha1_ctx - Context for computing HMAC-SHA1 of a message + * @sha_ctx: private + * @ostate: private + */ +struct hmac_sha1_ctx { + struct sha1_ctx sha_ctx; + struct sha1_block_state ostate; +}; + +/** + * hmac_sha1_preparekey() - Prepare a key for HMAC-SHA1 + * @key: (output) the key structure to initialize + * @raw_key: the raw HMAC-SHA1 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * Note: the caller is responsible for zeroizing both the struct hmac_sha1_key + * and the raw key once they are no longer needed. + * + * Context: Any context. + */ +void hmac_sha1_preparekey(struct hmac_sha1_key *key, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_sha1_init() - Initialize an HMAC-SHA1 context for a new message + * @ctx: (output) the HMAC context to initialize + * @key: the prepared HMAC key + * + * If you don't need incremental computation, consider hmac_sha1() instead. + * + * Context: Any context. + */ +void hmac_sha1_init(struct hmac_sha1_ctx *ctx, const struct hmac_sha1_key *key); + +/** + * hmac_sha1_init_usingrawkey() - Initialize an HMAC-SHA1 context for a new + * message, using a raw key + * @ctx: (output) the HMAC context to initialize + * @raw_key: the raw HMAC-SHA1 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * If you don't need incremental computation, consider hmac_sha1_usingrawkey() + * instead. + * + * Context: Any context. + */ +void hmac_sha1_init_usingrawkey(struct hmac_sha1_ctx *ctx, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_sha1_update() - Update an HMAC-SHA1 context with message data + * @ctx: the HMAC context to update; must have been initialized + * @data: the message data + * @data_len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void hmac_sha1_update(struct hmac_sha1_ctx *ctx, + const u8 *data, size_t data_len) +{ + sha1_update(&ctx->sha_ctx, data, data_len); +} + +/** + * hmac_sha1_final() - Finish computing an HMAC-SHA1 value + * @ctx: the HMAC context to finalize; must have been initialized + * @out: (output) the resulting HMAC-SHA1 value + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void hmac_sha1_final(struct hmac_sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]); + +/** + * hmac_sha1() - Compute HMAC-SHA1 in one shot, using a prepared key + * @key: the prepared HMAC key + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA1 value + * + * If you're using the key only once, consider using hmac_sha1_usingrawkey(). + * + * Context: Any context. + */ +void hmac_sha1(const struct hmac_sha1_key *key, + const u8 *data, size_t data_len, u8 out[SHA1_DIGEST_SIZE]); + +/** + * hmac_sha1_usingrawkey() - Compute HMAC-SHA1 in one shot, using a raw key + * @raw_key: the raw HMAC-SHA1 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-SHA1 value + * + * If you're using the key multiple times, prefer to use hmac_sha1_preparekey() + * followed by multiple calls to hmac_sha1() instead. + * + * Context: Any context. + */ +void hmac_sha1_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[SHA1_DIGEST_SIZE]); + #endif /* _CRYPTO_SHA1_H */ -- cgit v1.2.3 From 8d43417e93073699b521f603286140415b24968b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 May 2025 13:48:45 +0200 Subject: sunrpc: simplify xdr_init_encode_pages The rqst argument to xdr_init_encode_pages is set to NULL by all callers, and pages is always set to buf->pages. Remove the two arguments and hardcode the assignments. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a2ab813a9800..29d3a7659727 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -242,8 +242,7 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); -extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, - struct page **pages, struct rpc_rqst *rqst); +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); -- cgit v1.2.3 From f26c93053074bba9342b74632c195a043a825ac5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 27 May 2025 20:12:48 -0400 Subject: sunrpc: new tracepoints around svc thread wakeups Convert the svc_wake_up tracepoint into svc_pool_thread_event class. Have it also record the pool id, and add new tracepoints for when the thread is already running and for when there are no idle threads. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index aad697da1580..ff11fa07cbe3 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2123,22 +2123,35 @@ TRACE_EVENT(svc_xprt_accept, ) ); -TRACE_EVENT(svc_wake_up, - TP_PROTO(int pid), +DECLARE_EVENT_CLASS(svc_pool_thread_event, + TP_PROTO(const struct svc_pool *pool, pid_t pid), - TP_ARGS(pid), + TP_ARGS(pool, pid), TP_STRUCT__entry( - __field(int, pid) + __field(unsigned int, pool_id) + __field(pid_t, pid) ), TP_fast_assign( + __entry->pool_id = pool->sp_id; __entry->pid = pid; ), - TP_printk("pid=%d", __entry->pid) + TP_printk("pool=%u pid=%d", __entry->pool_id, __entry->pid) ); +#define DEFINE_SVC_POOL_THREAD_EVENT(name) \ + DEFINE_EVENT(svc_pool_thread_event, svc_pool_thread_##name, \ + TP_PROTO( \ + const struct svc_pool *pool, pid_t pid \ + ), \ + TP_ARGS(pool, pid)) + +DEFINE_SVC_POOL_THREAD_EVENT(wake); +DEFINE_SVC_POOL_THREAD_EVENT(running); +DEFINE_SVC_POOL_THREAD_EVENT(noidle); + TRACE_EVENT(svc_alloc_arg_err, TP_PROTO( unsigned int requested, -- cgit v1.2.3 From d49afc90a3ba3af4507049fb43cb128d9a9c66d5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Jun 2025 08:16:01 -0400 Subject: sunrpc: fix handling of unknown auth status codes In the case of an unknown error code from svc_authenticate or pg_authenticate, return AUTH_ERROR with a status of AUTH_FAILED. Also add the other auth_stat value from RFC 5531, and document all the status codes. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/msg_prot.h | 18 ++++++++++-------- include/linux/sunrpc/xdr.h | 2 ++ 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index c4b0eb2b2f04..ada17b57ca44 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -69,15 +69,17 @@ enum rpc_reject_stat { }; enum rpc_auth_stat { - RPC_AUTH_OK = 0, - RPC_AUTH_BADCRED = 1, - RPC_AUTH_REJECTEDCRED = 2, - RPC_AUTH_BADVERF = 3, - RPC_AUTH_REJECTEDVERF = 4, - RPC_AUTH_TOOWEAK = 5, + RPC_AUTH_OK = 0, /* success */ + RPC_AUTH_BADCRED = 1, /* bad credential (seal broken) */ + RPC_AUTH_REJECTEDCRED = 2, /* client must begin new session */ + RPC_AUTH_BADVERF = 3, /* bad verifier (seal broken) */ + RPC_AUTH_REJECTEDVERF = 4, /* verifier expired or replayed */ + RPC_AUTH_TOOWEAK = 5, /* rejected for security reasons */ + RPC_AUTH_INVALIDRESP = 6, /* bogus response verifier */ + RPC_AUTH_FAILED = 7, /* reason unknown */ /* RPCSEC_GSS errors */ - RPCSEC_GSS_CREDPROBLEM = 13, - RPCSEC_GSS_CTXPROBLEM = 14 + RPCSEC_GSS_CREDPROBLEM = 13, /* no credentials for user */ + RPCSEC_GSS_CTXPROBLEM = 14 /* problem with context */ }; #define RPC_MAXNETNAMELEN 256 diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 29d3a7659727..e3358c630ba1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -119,6 +119,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) #define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) #define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) +#define rpc_autherr_invalidresp cpu_to_be32(RPC_AUTH_INVALIDRESP) +#define rpc_autherr_failed cpu_to_be32(RPC_AUTH_FAILED) #define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) #define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) -- cgit v1.2.3 From 6f0e26243b02f440938ab7a3782eb730f2247fb1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Jun 2025 08:16:02 -0400 Subject: sunrpc: remove SVC_SYSERR Nothing returns this error code. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svcauth.h | 1 - include/trace/events/sunrpc.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 2e111153f7cd..4b92fec23a49 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -86,7 +86,6 @@ struct auth_domain { enum svc_auth_status { SVC_GARBAGE = 1, - SVC_SYSERR, SVC_VALID, SVC_NEGATIVE, SVC_OK, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ff11fa07cbe3..750ecce56930 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1691,7 +1691,6 @@ SVC_RQST_FLAG_LIST __print_flags(flags, "|", SVC_RQST_FLAG_LIST) TRACE_DEFINE_ENUM(SVC_GARBAGE); -TRACE_DEFINE_ENUM(SVC_SYSERR); TRACE_DEFINE_ENUM(SVC_VALID); TRACE_DEFINE_ENUM(SVC_NEGATIVE); TRACE_DEFINE_ENUM(SVC_OK); @@ -1704,7 +1703,6 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE); #define show_svc_auth_status(status) \ __print_symbolic(status, \ { SVC_GARBAGE, "SVC_GARBAGE" }, \ - { SVC_SYSERR, "SVC_SYSERR" }, \ { SVC_VALID, "SVC_VALID" }, \ { SVC_NEGATIVE, "SVC_NEGATIVE" }, \ { SVC_OK, "SVC_OK" }, \ -- cgit v1.2.3 From 2bac9a4c5f452d42a78ce07596ef88f75978b536 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Jun 2025 08:16:05 -0400 Subject: sunrpc: rearrange struct svc_rqst for fewer cachelines This shrinks the struct by 4 bytes, but also takes it from 19 to 18 cachelines on x86_64. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 48666b83fe68..40cbe81360ed 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -245,10 +245,10 @@ struct svc_rqst { * initialisation success. */ - unsigned long bc_to_initval; - unsigned int bc_to_retries; - void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned long bc_to_initval; + unsigned int bc_to_retries; unsigned int rq_status_counter; /* RPC processing counter */ + void **rq_lease_breaker; /* The v4 client breaking a lease */ }; /* bits for rq_flags */ -- cgit v1.2.3 From f4312e6bfa2a98e94dacc75f96f916b76bdf4259 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Tue, 8 Jul 2025 16:05:50 -0500 Subject: idpf: implement core RDMA auxiliary dev create, init, and destroy Add the initial idpf_idc.c file with the functions to kick off the IDC initialization, create and initialize a core RDMA auxiliary device, and destroy said device. The RDMA core has a dependency on the vports being created by the control plane before it can be initialized. Therefore, once all the vports are up after a hard reset (either during driver load a function level reset), the core RDMA device info will be created. It is populated with the function type (as distinguished by the IDC initialization function pointer), the core idc_ops function points (just stubs for now), the reserved RDMA MSIX table, and various other info the core RDMA auxiliary driver will need. It is then plugged on to the bus. During a function level reset or driver unload, the device will be unplugged from the bus and destroyed. Reviewed-by: Madhu Chittim Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma_idpf.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/net/intel/iidc_rdma_idpf.h (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma_idpf.h b/include/linux/net/intel/iidc_rdma_idpf.h new file mode 100644 index 000000000000..f2fe1844f660 --- /dev/null +++ b/include/linux/net/intel/iidc_rdma_idpf.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 Intel Corporation. */ + +#ifndef _IIDC_RDMA_IDPF_H_ +#define _IIDC_RDMA_IDPF_H_ + +#include + +/* struct to be populated by core LAN PCI driver */ +enum iidc_function_type { + IIDC_FUNCTION_TYPE_PF, + IIDC_FUNCTION_TYPE_VF, +}; + +struct iidc_rdma_priv_dev_info { + struct msix_entry *msix_entries; + u16 msix_count; /* How many vectors are reserved for this device */ + enum iidc_function_type ftype; +}; + +int idpf_idc_vport_dev_ctrl(struct iidc_rdma_core_dev_info *cdev_info, bool up); +int idpf_idc_request_reset(struct iidc_rdma_core_dev_info *cdev_info, + enum iidc_rdma_reset_type __always_unused reset_type); +int idpf_idc_rdma_vc_send_sync(struct iidc_rdma_core_dev_info *cdev_info, + u8 *send_msg, u16 msg_size, + u8 *recv_msg, u16 *recv_len); + +#endif /* _IIDC_RDMA_IDPF_H_ */ -- cgit v1.2.3 From be91128c579c86d295da4325f6ac4710e4e6d2b4 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Tue, 8 Jul 2025 16:05:51 -0500 Subject: idpf: implement RDMA vport auxiliary dev create, init, and destroy Implement the functions to create, initialize, and destroy an RDMA vport auxiliary device. The vport aux dev creation is dependent on the core aux device to call idpf_idc_vport_dev_ctrl to signal that it is ready for vport aux devices. Implement that core callback to either create and initialize the vport aux dev or deinitialize. RDMA vport aux dev creation is also dependent on the control plane to tell us the vport is RDMA enabled. Add a flag in the create vport message to signal individual vport RDMA capabilities. Reviewed-by: Madhu Chittim Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma_idpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma_idpf.h b/include/linux/net/intel/iidc_rdma_idpf.h index f2fe1844f660..16c970dd4c6e 100644 --- a/include/linux/net/intel/iidc_rdma_idpf.h +++ b/include/linux/net/intel/iidc_rdma_idpf.h @@ -6,6 +6,25 @@ #include +/* struct to be populated by core LAN PCI driver */ +struct iidc_rdma_vport_dev_info { + struct auxiliary_device *adev; + struct auxiliary_device *core_adev; + struct net_device *netdev; + u16 vport_id; +}; + +struct iidc_rdma_vport_auxiliary_dev { + struct auxiliary_device adev; + struct iidc_rdma_vport_dev_info *vdev_info; +}; + +struct iidc_rdma_vport_auxiliary_drv { + struct auxiliary_driver adrv; + void (*event_handler)(struct iidc_rdma_vport_dev_info *vdev, + struct iidc_rdma_event *event); +}; + /* struct to be populated by core LAN PCI driver */ enum iidc_function_type { IIDC_FUNCTION_TYPE_PF, -- cgit v1.2.3 From 6aa53e861c1a0c042690c9b7c5c153088ae61079 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Tue, 8 Jul 2025 16:05:54 -0500 Subject: idpf: implement get LAN MMIO memory regions The RDMA driver needs to map its own MMIO regions for the sake of performance, meaning the IDPF needs to avoid mapping portions of the BAR space. However, to be HW agnostic, the IDPF cannot assume where these are and must avoid mapping hard coded regions as much as possible. The IDPF maps the bare minimum to load and communicate with the control plane, i.e., the mailbox registers and the reset state registers. Because of how and when mailbox register offsets are initialized, it is easier to adjust the existing defines to be relative to the mailbox region starting address. Use a specific mailbox register write function that uses these relative offsets. The reset state register addresses are calculated the same way as for other registers, described below. The IDPF then calls a new virtchnl op to fetch a list of MMIO regions that it should map. The addresses for the registers in these regions are calculated by determining what region the register resides in, adjusting the offset to be relative to that region, and then adding the register's offset to that region's mapped address. If the new virtchnl op is not supported, the IDPF will fallback to mapping the whole bar. However, it will still map them as separate regions outside the mailbox and reset state registers. This way we can use the same logic in both cases to access the MMIO space. Reviewed-by: Madhu Chittim Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma_idpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma_idpf.h b/include/linux/net/intel/iidc_rdma_idpf.h index 16c970dd4c6e..bab697e18fd6 100644 --- a/include/linux/net/intel/iidc_rdma_idpf.h +++ b/include/linux/net/intel/iidc_rdma_idpf.h @@ -31,10 +31,18 @@ enum iidc_function_type { IIDC_FUNCTION_TYPE_VF, }; +struct iidc_rdma_lan_mapped_mem_region { + u8 __iomem *region_addr; + __le64 size; + __le64 start_offset; +}; + struct iidc_rdma_priv_dev_info { struct msix_entry *msix_entries; u16 msix_count; /* How many vectors are reserved for this device */ enum iidc_function_type ftype; + __le16 num_memory_regions; + struct iidc_rdma_lan_mapped_mem_region *mapped_mem_regions; }; int idpf_idc_vport_dev_ctrl(struct iidc_rdma_core_dev_info *cdev_info, bool up); -- cgit v1.2.3 From c76ed8790b3018fe36647d9aae96e0373f321184 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 12 Jul 2025 16:23:05 -0700 Subject: crypto: sha1 - Remove sha1_base.h sha1_base.h is no longer used, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250712232329.818226-15-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha1_base.h | 82 ---------------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 include/crypto/sha1_base.h (limited to 'include') diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h deleted file mode 100644 index 62701d136c79..000000000000 --- a/include/crypto/sha1_base.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha1_base.h - core logic for SHA-1 implementations - * - * Copyright (C) 2015 Linaro Ltd - */ - -#ifndef _CRYPTO_SHA1_BASE_H -#define _CRYPTO_SHA1_BASE_H - -#include -#include -#include -#include -#include -#include - -typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks); - -static inline int sha1_base_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA1_H0; - sctx->state[1] = SHA1_H1; - sctx->state[2] = SHA1_H2; - sctx->state[3] = SHA1_H3; - sctx->state[4] = SHA1_H4; - sctx->count = 0; - - return 0; -} - -static inline int sha1_base_do_update_blocks(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha1_block_fn *block_fn) -{ - unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE); - struct sha1_state *sctx = shash_desc_ctx(desc); - - sctx->count += len - remain; - block_fn(sctx, data, len / SHA1_BLOCK_SIZE); - return remain; -} - -static inline int sha1_base_do_finup(struct shash_desc *desc, - const u8 *src, unsigned int len, - sha1_block_fn *block_fn) -{ - unsigned int bit_offset = SHA1_BLOCK_SIZE / 8 - 1; - struct sha1_state *sctx = shash_desc_ctx(desc); - union { - __be64 b64[SHA1_BLOCK_SIZE / 4]; - u8 u8[SHA1_BLOCK_SIZE * 2]; - } block = {}; - - if (len >= bit_offset * 8) - bit_offset += SHA1_BLOCK_SIZE / 8; - memcpy(&block, src, len); - block.u8[len] = 0x80; - sctx->count += len; - block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); - block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SHA1_BLOCK_SIZE); - memzero_explicit(&block, sizeof(block)); - - return 0; -} - -static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - __be32 *digest = (__be32 *)out; - int i; - - for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx->state[i], digest++); - - return 0; -} - -#endif /* _CRYPTO_SHA1_BASE_H */ -- cgit v1.2.3 From 5a8f77e24a30bbce2fa57926f3dede84894fd10a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:18 +0200 Subject: PCI/IOV: Restore VF resizable BAR state after reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to regular resizable BARs, VF BARs can also be resized, e.g. by the system firmware or the PCI subsystem itself. The capability layout is the same as PCI_EXT_CAP_ID_REBAR. Add the capability ID and restore it as a part of IOV state. See PCIe r6.2, sec 7.8.7. Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Reviewed-by: Christian König Link: https://patch.msgid.link/20250702093522.518099-2-michal.winiarski@intel.com --- include/uapi/linux/pci_regs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index a3a3e942dedf..f5b17745de60 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -745,6 +745,7 @@ #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ +#define PCI_EXT_CAP_ID_VF_REBAR 0x24 /* VF Resizable BAR */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ @@ -1141,6 +1142,14 @@ #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ #define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) +/* VF Resizable BARs, same layout as PCI_REBAR */ +#define PCI_VF_REBAR_CAP PCI_REBAR_CAP +#define PCI_VF_REBAR_CAP_SIZES PCI_REBAR_CAP_SIZES +#define PCI_VF_REBAR_CTRL PCI_REBAR_CTRL +#define PCI_VF_REBAR_CTRL_BAR_IDX PCI_REBAR_CTRL_BAR_IDX +#define PCI_VF_REBAR_CTRL_NBAR_MASK PCI_REBAR_CTRL_NBAR_MASK +#define PCI_VF_REBAR_CTRL_BAR_SIZE PCI_REBAR_CTRL_BAR_SIZE + /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ -- cgit v1.2.3 From 84f890414a12b8d1480045b92a5e4e6ac4ab3419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 2 Jul 2025 11:35:22 +0200 Subject: PCI/IOV: Allow drivers to control VF BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers could leverage the fact that the VF BAR MMIO reservation is created for total number of VFs supported by the device by resizing the BAR to larger size when smaller number of VFs is enabled. Add pci_iov_vf_bar_set_size() to control the size and a pci_iov_vf_bar_get_sizes() helper to get the VF BAR sizes that will allow up to num_vfs to be successfully enabled with the current underlying reservation size. Signed-off-by: Michał Winiarski Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250702093522.518099-6-michal.winiarski@intel.com --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..28f06045ab20 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2438,6 +2438,8 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size); +u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs); void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); /* Arch may override these (weak) */ @@ -2490,6 +2492,10 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) #define pci_sriov_configure_simple NULL static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size) +{ return -ENODEV; } +static inline u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs) +{ return 0; } static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif -- cgit v1.2.3 From ce60ab3964782df9ba34f0a64c0bc766dd508bde Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2025 06:45:45 -0400 Subject: Expand the type of nfs_fattr->valid We need to be able to track more than 32 attributes per inode. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/1e3405fca54efd0be7c91c1da77917b94f5dfcc4.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 54 +++++++++++++++++++++++------------------------ 2 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 63141320c2a8..d7895eeccea3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -172,8 +172,8 @@ struct nfs_server { #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 #define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 - unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ + __u64 fattr_valid; /* Valid attributes */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 67f6632f723b..9cacbbd14787 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -45,7 +45,7 @@ struct nfs4_threshold { }; struct nfs_fattr { - unsigned int valid; /* which fields are valid */ + __u64 valid; /* which fields are valid */ umode_t mode; __u32 nlink; kuid_t uid; @@ -80,32 +80,32 @@ struct nfs_fattr { struct nfs4_label *label; }; -#define NFS_ATTR_FATTR_TYPE (1U << 0) -#define NFS_ATTR_FATTR_MODE (1U << 1) -#define NFS_ATTR_FATTR_NLINK (1U << 2) -#define NFS_ATTR_FATTR_OWNER (1U << 3) -#define NFS_ATTR_FATTR_GROUP (1U << 4) -#define NFS_ATTR_FATTR_RDEV (1U << 5) -#define NFS_ATTR_FATTR_SIZE (1U << 6) -#define NFS_ATTR_FATTR_PRESIZE (1U << 7) -#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) -#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) -#define NFS_ATTR_FATTR_FSID (1U << 10) -#define NFS_ATTR_FATTR_FILEID (1U << 11) -#define NFS_ATTR_FATTR_ATIME (1U << 12) -#define NFS_ATTR_FATTR_MTIME (1U << 13) -#define NFS_ATTR_FATTR_CTIME (1U << 14) -#define NFS_ATTR_FATTR_PREMTIME (1U << 15) -#define NFS_ATTR_FATTR_PRECTIME (1U << 16) -#define NFS_ATTR_FATTR_CHANGE (1U << 17) -#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) -#define NFS_ATTR_FATTR_V4_LOCATIONS (1U << 19) -#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 20) -#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 21) -#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22) -#define NFS_ATTR_FATTR_OWNER_NAME (1U << 23) -#define NFS_ATTR_FATTR_GROUP_NAME (1U << 24) -#define NFS_ATTR_FATTR_V4_SECURITY_LABEL (1U << 25) +#define NFS_ATTR_FATTR_TYPE BIT_ULL(0) +#define NFS_ATTR_FATTR_MODE BIT_ULL(1) +#define NFS_ATTR_FATTR_NLINK BIT_ULL(2) +#define NFS_ATTR_FATTR_OWNER BIT_ULL(3) +#define NFS_ATTR_FATTR_GROUP BIT_ULL(4) +#define NFS_ATTR_FATTR_RDEV BIT_ULL(5) +#define NFS_ATTR_FATTR_SIZE BIT_ULL(6) +#define NFS_ATTR_FATTR_PRESIZE BIT_ULL(7) +#define NFS_ATTR_FATTR_BLOCKS_USED BIT_ULL(8) +#define NFS_ATTR_FATTR_SPACE_USED BIT_ULL(9) +#define NFS_ATTR_FATTR_FSID BIT_ULL(10) +#define NFS_ATTR_FATTR_FILEID BIT_ULL(11) +#define NFS_ATTR_FATTR_ATIME BIT_ULL(12) +#define NFS_ATTR_FATTR_MTIME BIT_ULL(13) +#define NFS_ATTR_FATTR_CTIME BIT_ULL(14) +#define NFS_ATTR_FATTR_PREMTIME BIT_ULL(15) +#define NFS_ATTR_FATTR_PRECTIME BIT_ULL(16) +#define NFS_ATTR_FATTR_CHANGE BIT_ULL(17) +#define NFS_ATTR_FATTR_PRECHANGE BIT_ULL(18) +#define NFS_ATTR_FATTR_V4_LOCATIONS BIT_ULL(19) +#define NFS_ATTR_FATTR_V4_REFERRAL BIT_ULL(20) +#define NFS_ATTR_FATTR_MOUNTPOINT BIT_ULL(21) +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID BIT_ULL(22) +#define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) +#define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) +#define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ -- cgit v1.2.3 From 1c7ae2dd3f0e6d07ec0a5a348f2561f2171b9c81 Mon Sep 17 00:00:00 2001 From: Anne Marie Merritt Date: Thu, 29 May 2025 06:45:46 -0400 Subject: nfs: Add timecreate to nfs inode Add tracking of the create time (a.k.a. btime) along with corresponding bitfields, request, and decode xdr routines. Signed-off-by: Anne Marie Merritt Signed-off-by: Lance Shelton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/1e3677b0655fa2bbaba0817b41d111d94a06e5ee.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 ++++++++ include/linux/nfs_xdr.h | 3 +++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67ae2c3f41d2..c585939b6cd6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -160,6 +160,12 @@ struct nfs_inode { unsigned long flags; /* atomic bit ops */ unsigned long cache_validity; /* bit mask */ + /* + * NFS Attributes not included in struct inode + */ + + struct timespec64 btime; + /* * read_cache_jiffies is when we started read-caching this inode. * attrtimeo is for how long the cached information is assumed @@ -316,10 +322,12 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ #define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ +#define NFS_INO_INVALID_BTIME BIT(18) /* cached btime is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ + | NFS_INO_INVALID_BTIME \ | NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_NLINK \ | NFS_INO_INVALID_MODE \ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cacbbd14787..ac4bff6e9913 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -67,6 +67,7 @@ struct nfs_fattr { struct timespec64 atime; struct timespec64 mtime; struct timespec64 ctime; + struct timespec64 btime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ @@ -106,6 +107,7 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) #define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) #define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) +#define NFS_ATTR_FATTR_BTIME BIT_ULL(26) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ @@ -126,6 +128,7 @@ struct nfs_fattr { | NFS_ATTR_FATTR_SPACE_USED) #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_BTIME \ | NFS_ATTR_FATTR_V4_SECURITY_LABEL) /* -- cgit v1.2.3 From 0715a72ee9a38461eac4b34388b772914f269119 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:37 +0200 Subject: NFS: remove unused wpages field from struct nfs_server The wpages field is not serving any purpose since commit c63c7b051395 ("NFS: Fix a race when doing NFS write coalescing") which was merged in v2.6.22-rc1. Remove it completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-2-ailiop@suse.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d7895eeccea3..7048f9b867ab 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -177,7 +177,6 @@ struct nfs_server { unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ - unsigned int wpages; /* write size (in pages) */ unsigned int wtmult; /* server disk block size */ unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ -- cgit v1.2.3 From 74a33326cfe8e62ebe0a65ba01ea8a8bceb532f8 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:38 +0200 Subject: NFS: remove unused time_delta field from struct nfs_server The last code that was using this was removed via commit ca0daa277aca ("NFS: Cache aggressively when file is open for writing") which was merged in v4.8-rc1, so it can be removed completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-3-ailiop@suse.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7048f9b867ab..e1b2cf57e765 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -202,7 +202,6 @@ struct nfs_server { struct nfs_fsid fsid; int s_sysfs_id; /* sysfs dentry index */ __u64 maxfilesize; /* maximum file size */ - struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ -- cgit v1.2.3 From 2c665d91c2a2d8b5bdf1374d1253b3c89fca4ede Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:39 +0200 Subject: NFS: remove unused pnfs_ld_data field from struct nfs_server The last code that was using this was removed via commit 20d655d6197d ("pnfs/blocklayout: use the device id cache") which was merged in v3.18-rc1, so it can be removed completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-4-ailiop@suse.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e1b2cf57e765..d2d36711a119 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -246,7 +246,6 @@ struct nfs_server { filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; - void *pnfs_ld_data; /* per mount point data */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; -- cgit v1.2.3 From 48693d119b2114f8eaf8b8f972b29e05ae581ad4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 13 Jul 2025 00:30:06 +0100 Subject: SUNRPC: Remove unused xdr functions Remove a bunch of unused xdr_*decode* functions: The last use of xdr_decode_netobj() was removed in 2021 by: commit 7cf96b6d0104 ("lockd: Update the NLMv4 SHARE arguments decoder to use struct xdr_stream") The last use of xdr_decode_string_inplace() was removed in 2021 by: commit 3049e974a7c7 ("lockd: Update the NLMv4 FREE_ALL arguments decoder to use struct xdr_stream") The last use of xdr_stream_decode_opaque() was removed in 2024 by: commit fed8a17c61ff ("xdrgen: typedefs should use the built-in string and opaque functions") The functions xdr_stream_decode_string() and xdr_stream_decode_opaque_dup() were both added in 2018 by the commit 0e779aa70308 ("SUNRPC: Add helpers for decoding opaque and string types") but never used. Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250712233006.403226-1-linux@treblig.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a2ab813a9800..e370886632b0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -128,10 +128,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); -__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, - unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); -__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); @@ -341,12 +338,6 @@ xdr_stream_remaining(const struct xdr_stream *xdr) return xdr->nwords << 2; } -ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, - size_t size); -ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, - size_t maxlen, gfp_t gfp_flags); -ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, - size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, -- cgit v1.2.3 From c49601642f95c8c6787acb07881f2495bc8aeb27 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 14 Jul 2025 13:23:35 +0530 Subject: scsi: ufs: core: Add ufshcd_dme_rmw() to modify DME attributes Introduce ufshcd_dme_rmw() API to read, modify, and write DME attributes in UFS host controllers using a mask and value. Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250714075336.2133-3-quic_nitirawa@quicinc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9b3515cee711..1d3943777584 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1480,6 +1480,7 @@ void ufshcd_resume_complete(struct device *dev); bool ufshcd_is_hba_active(struct ufs_hba *hba); void ufshcd_pm_qos_init(struct ufs_hba *hba); void ufshcd_pm_qos_exit(struct ufs_hba *hba); +int ufshcd_dme_rmw(struct ufs_hba *hba, u32 mask, u32 val, u32 attr); /* Wrapper functions for safely calling variant operations */ static inline int ufshcd_vops_init(struct ufs_hba *hba) -- cgit v1.2.3 From 2a683d005286018c6f47ef0e432829655a6a21a3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 11 Jul 2025 05:10:59 +0000 Subject: dev: Pass netdevice_tracker to dev_get_by_flags_rcu(). This is a follow-up for commit eb1ac9ff6c4a5 ("ipv6: anycast: Don't hold RTNL for IPV6_JOIN_ANYCAST."). We should not add a new device lookup API without netdevice_tracker. Let's pass netdevice_tracker to dev_get_by_flags_rcu() and rename it with netdev_ prefix to match other newer APIs. Note that we always use GFP_ATOMIC for netdev_hold() as it's expected to be called under RCU. Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20250708184053.102109f6@kernel.org/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250711051120.2866855-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a80d21a14612..ec23cee5245d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3332,8 +3332,6 @@ int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, struct net_device_path_stack *stack); -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, - unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); @@ -3396,6 +3394,8 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex, netdevice_tracker *tracker, gfp_t gfp); struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); +struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker, + unsigned short flags, unsigned short mask); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); void netdev_copy_name(struct net_device *dev, char *name); -- cgit v1.2.3 From 08a305b2a5b8e125120bcf670ffe775c86cf1f59 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 12 Jul 2025 21:57:59 +0100 Subject: net/x25: Remove unused x25_terminate_link() x25_terminate_link() has been unused since the last use was removed in 2020 by: commit 7eed751b3b2a ("net/x25: handle additional netdev events") Remove it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Martin Schiller Link: https://patch.msgid.link/20250712205759.278777-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/x25.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/x25.h b/include/net/x25.h index 5e833cfc864e..414f3fd99345 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -203,7 +203,6 @@ void x25_send_frame(struct sk_buff *, struct x25_neigh *); int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); void x25_establish_link(struct x25_neigh *); -void x25_terminate_link(struct x25_neigh *); /* x25_facilities.c */ int x25_parse_facilities(struct sk_buff *, struct x25_facilities *, -- cgit v1.2.3 From 25236d4844ad8631a3ff12f1b33aaa27ac74172d Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 7 Jul 2025 16:22:25 -0400 Subject: scsi: scsi_transport_fc: Change to use per-rport devloss_work_q Configurations with large numbers of FC rports per host instance are taking a very long time to complete all devloss work. Increase potential parallelism by using a per-rport devloss_work_q for dev_loss_work and fast_io_fail_work. Signed-off-by: Ewan D. Milne Link: https://lore.kernel.org/r/20250707202225.1203189-1-emilne@redhat.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_fc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index d02b55261307..b908aacfef48 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -383,6 +383,8 @@ struct fc_rport { /* aka fc_starget_attrs */ struct work_struct stgt_delete_work; struct work_struct rport_delete_work; struct request_queue *rqst_q; /* bsg support */ + + struct workqueue_struct *devloss_work_q; } __attribute__((aligned(sizeof(unsigned long)))); /* bit field values for struct fc_rport "flags" field: */ @@ -576,7 +578,6 @@ struct fc_host_attrs { /* work queues for rport state manipulation */ struct workqueue_struct *work_q; - struct workqueue_struct *devloss_work_q; /* bsg support */ struct request_queue *rqst_q; @@ -654,8 +655,6 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse) #define fc_host_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->work_q) -#define fc_host_devloss_work_q(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) #define fc_host_dev_loss_tmo(x) \ (((struct fc_host_attrs *)(x)->shost_data)->dev_loss_tmo) #define fc_host_max_ct_payload(x) \ -- cgit v1.2.3 From 2677010e7793451c20d895c477c4dc76f6e6a10e Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Thu, 10 Jul 2025 21:12:03 +0000 Subject: Add support to set NAPI threaded for individual NAPI A net device has a threaded sysctl that can be used to enable threaded NAPI polling on all of the NAPI contexts under that device. Allow enabling threaded NAPI polling at individual NAPI level using netlink. Extend the netlink operation `napi-set` and allow setting the threaded attribute of a NAPI. This will enable the threaded polling on a NAPI context. Add a test in `nl_netdev.py` that verifies various cases of threaded NAPI being set at NAPI and at device level. Tested ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250710211203.3979655-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + include/uapi/linux/netdev.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec23cee5245d..e49d8c98d284 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,6 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; + bool threaded; unsigned int napi_id; }; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 9ca48d616ed76b284f946667a3cb7961205c8ee3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jul 2025 11:39:59 +0000 Subject: tcp: do not accept packets beyond window Currently, TCP accepts incoming packets which might go beyond the offered RWIN. Add to tcp_sequence() the validation of packet end sequence. Add the corresponding check in the fast path. We relax this new constraint if the receive queue is empty, to not freeze flows from buggy peers. Add a new drop reason : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250711114006.480026-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index b9e78290269e..beb134d55747 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -45,6 +45,7 @@ FN(TCP_LISTEN_OVERFLOW) \ FN(TCP_OLD_SEQUENCE) \ FN(TCP_INVALID_SEQUENCE) \ + FN(TCP_INVALID_END_SEQUENCE) \ FN(TCP_INVALID_ACK_SEQUENCE) \ FN(TCP_RESET) \ FN(TCP_INVALID_SYN) \ @@ -303,8 +304,13 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_LISTEN_OVERFLOW, /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */ SKB_DROP_REASON_TCP_OLD_SEQUENCE, - /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field. */ SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** + * @SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE: + * Not acceptable END_SEQ field. + */ + SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE, /** * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ * field because ack sequence is not in the window between snd_una -- cgit v1.2.3 From 6c758062c64dfbd61862801fbde4e0702f4f3a23 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jul 2025 11:40:00 +0000 Subject: tcp: add LINUX_MIB_BEYOND_WINDOW Add a new SNMP MIB : LINUX_MIB_BEYOND_WINDOW Incremented when an incoming packet is received beyond the receiver window. nstat -az | grep TcpExtBeyondWindow Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250711114006.480026-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 1 + include/uapi/linux/snmp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index beb134d55747..229bb1826f2a 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -309,6 +309,7 @@ enum skb_drop_reason { /** * @SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE: * Not acceptable END_SEQ field. + * Corresponds to LINUX_MIB_BEYOND_WINDOW. */ SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE, /** diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 1d234d7e1892..49f5640092a0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -186,6 +186,7 @@ enum LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ + LINUX_MIB_BEYOND_WINDOW, /* BeyondWindow */ LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */ LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */ -- cgit v1.2.3 From 75dff0584cce79203ee9968c66c7589150fed591 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jul 2025 11:40:04 +0000 Subject: tcp: add const to tcp_try_rmem_schedule() and sk_rmem_schedule() skb These functions to not modify the skb, add a const qualifier. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250711114006.480026-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 0f2443d4ec58..c8a4b283df6f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1553,7 +1553,7 @@ __sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc) } static inline bool -sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) +sk_rmem_schedule(struct sock *sk, const struct sk_buff *skb, int size) { return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb)); } -- cgit v1.2.3 From d7c36d6350b5a4b27256eaeeea3b72621a819c9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:29:21 +0200 Subject: locking/lockdep: Avoid struct return in lock_stats() Returning a large structure from the lock_stats() function causes clang to have multiple copies of it on the stack and copy between them, which can end up exceeding the frame size warning limit: kernel/locking/lockdep.c:300:25: error: stack frame size (1464) exceeds limit (1280) in 'lock_stats' [-Werror,-Wframe-larger-than] 300 | struct lock_class_stats lock_stats(struct lock_class *class) Change the calling conventions to directly operate on the caller's copy, which apparently is what gcc does already. Signed-off-by: Arnd Bergmann Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20250610092941.2642847-1-arnd@kernel.org --- include/linux/lockdep_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 9f361d3ab9d9..eae115a26488 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -175,7 +175,7 @@ struct lock_class_stats { unsigned long bounces[nr_bounce_types]; }; -struct lock_class_stats lock_stats(struct lock_class *class); +void lock_stats(struct lock_class *class, struct lock_class_stats *stats); void clear_lock_stats(struct lock_class *class); #endif -- cgit v1.2.3 From 6fd9e1aa078490ed6e79307465269629fcb43018 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 1 Jul 2025 14:55:54 +0100 Subject: regset: Fix kerneldoc for struct regset_get() in user_regset Commit 7717cb9bdd04 ("regset: new method and helpers for it") added a new interface ->regset_get() for struct user_regset, and commit 1e6986c9db21 ("regset: kill ->get()") got rid of the old interface. The kerneldoc comment block was never updated to take account of this change, though. Update it. No functional change. Signed-off-by: Dave Martin Cc: Oleg Nesterov Cc: Kees Cook Cc: Akihiko Odaki Reviewed-by: Akihiko Odaki Link: https://lore.kernel.org/r/20250701135616.29630-2-Dave.Martin@arm.com Signed-off-by: Kees Cook --- include/linux/regset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regset.h b/include/linux/regset.h index 9061266dd8de..02417e934845 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -151,7 +151,7 @@ typedef int user_regset_writeback_fn(struct task_struct *target, * @align: Required alignment, in bytes. * @bias: Bias from natural indexing. * @core_note_type: ELF note @n_type value used in core dumps. - * @get: Function to fetch values. + * @regset_get: Function to fetch values. * @set: Function to store values. * @active: Function to report if regset is active, or %NULL. * @writeback: Function to write data back to user memory, or %NULL. -- cgit v1.2.3 From 85a7f9cbf8a83cfe0aca04053a832206c4ad1272 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 1 Jul 2025 14:55:55 +0100 Subject: regset: Add explicit core note name in struct user_regset There is currently hard-coded logic spread around the tree for determining the note name for regset notes emitted in coredumps. Now that the names are declared explicitly in , this can be simplified. In preparation for getting rid of the special-case logic, add an explicit core_note_name field in struct user_regset for specifying the note name explicitly. To help avoid mistakes, a convenience macro USER_REGSET_NOTE_TYPE() is provided to set .core_note_type and .core_note_name based on the note type. When dumping core, use the new field to set the note name, if the regset specifies it. Signed-off-by: Dave Martin Cc: Oleg Nesterov Cc: Kees Cook Cc: Akihiko Odaki Acked-by: Alexander Gordeev # s390 Reviewed-by: Akihiko Odaki Link: https://lore.kernel.org/r/20250701135616.29630-3-Dave.Martin@arm.com Signed-off-by: Kees Cook --- include/linux/regset.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/regset.h b/include/linux/regset.h index 02417e934845..ad1ca6fe04f4 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -151,6 +151,7 @@ typedef int user_regset_writeback_fn(struct task_struct *target, * @align: Required alignment, in bytes. * @bias: Bias from natural indexing. * @core_note_type: ELF note @n_type value used in core dumps. + * @core_note_name: ELF note name to qualify the note type. * @regset_get: Function to fetch values. * @set: Function to store values. * @active: Function to report if regset is active, or %NULL. @@ -190,6 +191,10 @@ typedef int user_regset_writeback_fn(struct task_struct *target, * * If nonzero, @core_note_type gives the n_type field (NT_* value) * of the core file note in which this regset's data appears. + * @core_note_name specifies the note name. The preferred way to + * specify these two fields is to use the @USER_REGSET_NOTE_TYPE() + * macro. + * * NT_PRSTATUS is a special case in that the regset data starts at * offsetof(struct elf_prstatus, pr_reg) into the note data; that is * part of the per-machine ELF formats userland knows about. In @@ -207,8 +212,13 @@ struct user_regset { unsigned int align; unsigned int bias; unsigned int core_note_type; + const char *core_note_name; }; +#define USER_REGSET_NOTE_TYPE(type) \ + .core_note_type = (NT_ ## type), \ + .core_note_name = (NN_ ## type) + /** * struct user_regset_view - available regsets * @name: Identifier, e.g. UTS_MACHINE string. -- cgit v1.2.3 From 444020f4bf06fb86805ee7e7ceec0375485fd94d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 14 Jul 2025 14:21:30 +0200 Subject: wifi: cfg80211: remove scan request n_channels counted_by This reverts commit e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by"). This really has been a completely failed experiment. There were no actual bugs found, and yet at this point we already have four "fixes" to it, with nothing to show for but code churn, and it never even made the code any safer. In all of the cases that ended up getting "fixed", the structure is also internally inconsistent after the n_channels setting as the channel list isn't actually filled yet. You cannot scan with such a structure, that's just wrong. In mac80211, the struct is also reused multiple times, so initializing it once is no good. Some previous "fixes" (e.g. one in brcm80211) are also just setting n_channels before accessing the array, under the assumption that the code is correct and the array can be accessed, further showing that the whole thing is just pointless when the allocation count and use count are not separate. If we really wanted to fix it, we'd need to separately track the number of channels allocated and the number of channels currently used, but given that no bugs were found despite the numerous syzbot reports, that'd just be a waste of time. Remove the __counted_by() annotation. We really should also remove a number of the n_channels settings that are setting up a structure that's inconsistent, but that can wait. Reported-by: syzbot+e834e757bd9b3d3e1251@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e834e757bd9b3d3e1251 Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") Link: https://patch.msgid.link/20250714142130.9b0bbb7e1f07.I09112ccde72d445e11348fc2bef68942cb2ffc94@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1848dc8ec99..10248d527616 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2690,7 +2690,7 @@ struct cfg80211_scan_request { s8 tsf_report_link_id; /* keep last */ - struct ieee80211_channel *channels[] __counted_by(n_channels); + struct ieee80211_channel *channels[]; }; static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) -- cgit v1.2.3 From 14450be2332a49445106403492a367412b8c23f4 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 9 Jul 2025 23:37:55 +0300 Subject: wifi: cfg80211: Fix interface type validation Fix a condition that verified valid values of interface types. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250709233537.7ad199ca5939.I0ac1ff74798bf59a87a57f2e18f2153c308b119b@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6ec9a8865b8b..f67424ec1085 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -633,7 +633,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, const struct ieee80211_sband_iftype_data *data; int i; - if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) return NULL; if (iftype == NL80211_IFTYPE_AP_VLAN) -- cgit v1.2.3 From c932be7262323011ae8caa050811300b85347050 Mon Sep 17 00:00:00 2001 From: Yuvarani V Date: Thu, 10 Jul 2025 11:04:27 +0530 Subject: wifi: cfg80211: parse attribute to update unsolicited probe response template At present, the updated unsolicited broadcast probe response template is not processed during userspace commands such as channel switch or color change. This leads to an issue where older incorrect unsolicited probe response is still used during these events. Add support to parse the netlink attribute and store it so that mac80211/drivers can use it to set the BSS_CHANGED_UNSOL_BCAST_PROBE_RESP flag in order to send the updated unsolicited broadcast probe response templates during these events. Signed-off-by: Yuvarani V Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20250710-update_unsol_bcast_probe_resp-v2-1-31aca39d3b30@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f67424ec1085..77bc17d6e96d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1526,6 +1526,7 @@ struct cfg80211_ap_update { * @n_counter_offsets_beacon: number of csa counters the beacon (tail) * @n_counter_offsets_presp: number of csa counters in the probe response * @beacon_after: beacon data to be used on the new channel + * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch @@ -1540,6 +1541,7 @@ struct cfg80211_csa_settings { unsigned int n_counter_offsets_beacon; unsigned int n_counter_offsets_presp; struct cfg80211_beacon_data beacon_after; + struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; bool radar_required; bool block_tx; u8 count; @@ -1555,6 +1557,7 @@ struct cfg80211_csa_settings { * @counter_offset_beacon: offsets of the counters within the beacon (tail) * @counter_offset_presp: offsets of the counters within the probe response * @beacon_next: beacon data to be used after the color change + * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters * @count: number of beacons until the color change * @color: the color used after the change * @link_id: defines the link on which color change is expected during MLO. @@ -1565,6 +1568,7 @@ struct cfg80211_color_change_settings { u16 counter_offset_beacon; u16 counter_offset_presp; struct cfg80211_beacon_data beacon_next; + struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; u8 count; u8 color; u8 link_id; -- cgit v1.2.3 From 1aeed732f4f885ad36280ca4afb331fa42bf7263 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 10 Jul 2025 16:55:58 +0800 Subject: net: mctp: Use hashtable for binds Ensure that a specific EID (remote or local) bind will match in preference to a MCTP_ADDR_ANY bind. This adds infrastructure for binding a socket to receive messages from a specific remote peer address, a future commit will expose an API for this. Signed-off-by: Matt Johnston Link: https://patch.msgid.link/20250710-mctp-bind-v4-5-8ec2f6460c56@codeconstruct.com.au Signed-off-by: Paolo Abeni --- include/net/netns/mctp.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index 1db8f9aaddb4..89555f90b97b 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -6,19 +6,25 @@ #ifndef __NETNS_MCTP_H__ #define __NETNS_MCTP_H__ +#include +#include #include #include +#define MCTP_BINDS_BITS 7 + struct netns_mctp { /* Only updated under RTNL, entries freed via RCU */ struct list_head routes; - /* Bound sockets: list of sockets bound by type. - * This list is updated from non-atomic contexts (under bind_lock), - * and read (under rcu) in packet rx + /* Bound sockets: hash table of sockets, keyed by + * (type, src_eid, dest_eid). + * Specific src_eid/dest_eid entries also have an entry for + * MCTP_ADDR_ANY. This list is updated from non-atomic contexts + * (under bind_lock), and read (under rcu) in packet rx. */ struct mutex bind_lock; - struct hlist_head binds; + DECLARE_HASHTABLE(binds, MCTP_BINDS_BITS); /* tag allocations. This list is read and updated from atomic contexts, * but elements are free()ed after a RCU grace-period @@ -34,4 +40,10 @@ struct netns_mctp { struct list_head neighbours; }; +static inline u32 mctp_bind_hash(u8 type, u8 local_addr, u8 peer_addr) +{ + return hash_32(type | (u32)local_addr << 8 | (u32)peer_addr << 16, + MCTP_BINDS_BITS); +} + #endif /* __NETNS_MCTP_H__ */ -- cgit v1.2.3 From 3549eb08e5505823857838b5cf5f08567702d054 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 10 Jul 2025 16:55:59 +0800 Subject: net: mctp: Allow limiting binds to a peer address Prior to calling bind() a program may call connect() on a socket to restrict to a remote peer address. Using connect() is the normal mechanism to specify a remote network peer, so we use that here. In MCTP connect() is only used for bound sockets - send() is not available for MCTP since a tag must be provided for each message. The smctp_type must match between connect() and bind() calls. Signed-off-by: Matt Johnston Link: https://patch.msgid.link/20250710-mctp-bind-v4-6-8ec2f6460c56@codeconstruct.com.au Signed-off-by: Paolo Abeni --- include/net/mctp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index ac4f4ecdfc24..c3207ce98f07 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -69,7 +69,10 @@ struct mctp_sock { /* bind() params */ unsigned int bind_net; - mctp_eid_t bind_addr; + mctp_eid_t bind_local_addr; + mctp_eid_t bind_peer_addr; + unsigned int bind_peer_net; + bool bind_peer_set; __u8 bind_type; /* sendmsg()/recvmsg() uses struct sockaddr_mctp_ext */ -- cgit v1.2.3 From 0a5dc1b67ef5c7e851b57764a2aab8cc4341a7b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:02 -0300 Subject: drm/sched: Rename DRM_GPU_SCHED_STAT_NOMINAL to DRM_GPU_SCHED_STAT_RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Among the scheduler's statuses, the only one that indicates an error is DRM_GPU_SCHED_STAT_ENODEV. Any status other than DRM_GPU_SCHED_STAT_ENODEV signifies that the operation succeeded and the GPU is in a nominal state. However, to provide more information about the GPU's status, it is needed to convey more information than just "OK". Therefore, rename DRM_GPU_SCHED_STAT_NOMINAL to DRM_GPU_SCHED_STAT_RESET, which better communicates the meaning of this status. The status DRM_GPU_SCHED_STAT_RESET indicates that the GPU has hung, but it has been successfully reset and is now in a nominal state again. Reviewed-by: Philipp Stanner Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-1-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- include/drm/gpu_scheduler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 190844370f48..ed300920996a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -391,12 +391,12 @@ struct drm_sched_job { * enum drm_gpu_sched_stat - the scheduler's status * * @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use. - * @DRM_GPU_SCHED_STAT_NOMINAL: Operation succeeded. + * @DRM_GPU_SCHED_STAT_RESET: The GPU hung and successfully reset. * @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore. */ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_NONE, - DRM_GPU_SCHED_STAT_NOMINAL, + DRM_GPU_SCHED_STAT_RESET, DRM_GPU_SCHED_STAT_ENODEV, }; -- cgit v1.2.3 From 0b1217bfdfddf664c15954d1d51ee18ed88a2ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:03 -0300 Subject: drm/sched: Allow drivers to skip the reset and keep on running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the DRM scheduler times out, it's possible that the GPU isn't hung; instead, a job just took unusually long (longer than the timeout) but is still running, and there is, thus, no reason to reset the hardware. This can occur in two scenarios: 1. The job is taking longer than the timeout, but the driver determined through a GPU-specific mechanism that the hardware is still making progress. Hence, the driver would like the scheduler to skip the timeout and treat the job as still pending from then onward. This happens in v3d, Etnaviv, and Xe. 2. Timeout has fired before the free-job worker. Consequently, the scheduler calls `sched->ops->timedout_job()` for a job that isn't timed out. These two scenarios are problematic because the job was removed from the `sched->pending_list` before calling `sched->ops->timedout_job()`, which means that when the job finishes, it won't be freed by the scheduler though `sched->ops->free_job()` - leading to a memory leak. To solve these problems, create a new `drm_gpu_sched_stat`, called DRM_GPU_SCHED_STAT_NO_HANG, which allows a driver to skip the reset. The new status will indicate that the job must be reinserted into `sched->pending_list`, and the hardware / driver will still complete that job. Reviewed-by: Philipp Stanner Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-2-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- include/drm/gpu_scheduler.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index ed300920996a..323a505e6e6a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -393,11 +393,14 @@ struct drm_sched_job { * @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use. * @DRM_GPU_SCHED_STAT_RESET: The GPU hung and successfully reset. * @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore. + * @DRM_GPU_SCHED_STAT_NO_HANG: Contrary to scheduler's assumption, the GPU + * did not hang and is still running. */ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_NONE, DRM_GPU_SCHED_STAT_RESET, DRM_GPU_SCHED_STAT_ENODEV, + DRM_GPU_SCHED_STAT_NO_HANG, }; /** -- cgit v1.2.3 From d9c37a4904ec21ef7d45880fe023c11341869c28 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 14 Jul 2025 14:55:57 +0930 Subject: fs: add a new remove_bdev() callback Currently all filesystems which implement super_operations::shutdown() can not afford losing a device. Thus fs_bdev_mark_dead() will just call the ->shutdown() callback for the involved filesystem. But it will no longer be the case, as multi-device filesystems like btrfs and bcachefs can handle certain device loss without the need to shutdown the whole filesystem. To allow those multi-device filesystems to be integrated to use fs_holder_ops: - Add a new super_operations::remove_bdev() callback - Try ->remove_bdev() callback first inside fs_bdev_mark_dead() If the callback returned 0, meaning the fs can handling the device loss, then exit without doing anything else. If there is no such callback or the callback returned non-zero value, continue to shutdown the filesystem as usual. This means the new remove_bdev() should only do the check on whether the operation can continue, and if so do the fs specific handlings. The shutdown handling should still be handled by the existing ->shutdown() callback. For all existing filesystems with shutdown callback, there is no change to the code nor behavior. Btrfs is going to implement both the ->remove_bdev() and ->shutdown() callbacks soon. Signed-off-by: Qu Wenruo Link: https://lore.kernel.org/09909fcff7f2763cc037fec97ac2482bdc0a12cb.1752470276.git.wqu@suse.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..5150db41109a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2363,6 +2363,15 @@ struct super_operations { struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); + /* + * If a filesystem can support graceful removal of a device and + * continue read-write operations, implement this callback. + * + * Return 0 if the filesystem can continue read-write. + * Non-zero return value or no such callback means the fs will be shutdown + * as usual. + */ + int (*remove_bdev)(struct super_block *sb, struct block_device *bdev); void (*shutdown)(struct super_block *sb); }; -- cgit v1.2.3 From f99d4fccd2185176baf4ecac9a49d280fc62b953 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 12 Jul 2025 15:40:17 +0800 Subject: dt-bindings: power: Add A523 PPU and PCK600 power controllers The A523 PPU is likely the same kind of hardware seen on previous SoCs. The A523 PCK600, as the name suggests, is likely a customized version of ARM's PCK-600 power controller. Comparing the BSP driver against ARM's PPU datasheet shows that the basic registers line up, but Allwinner's hardware has some additional delay controls in the reserved register range. As such it is likely not fully compatible with the standard ARM version. Document A523 PPU and PCK600 compatibles. Also reorder the compatible string entries so they are grouped and ordered by family first, then by SoC model. Reviewed-by: Andre Przywara Reviewed-by: Rob Herring (Arm) Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20250712074021.805953-2-wens@kernel.org Signed-off-by: Ulf Hansson --- include/dt-bindings/power/allwinner,sun55i-a523-pck-600.h | 15 +++++++++++++++ include/dt-bindings/power/allwinner,sun55i-a523-ppu.h | 12 ++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 include/dt-bindings/power/allwinner,sun55i-a523-pck-600.h create mode 100644 include/dt-bindings/power/allwinner,sun55i-a523-ppu.h (limited to 'include') diff --git a/include/dt-bindings/power/allwinner,sun55i-a523-pck-600.h b/include/dt-bindings/power/allwinner,sun55i-a523-pck-600.h new file mode 100644 index 000000000000..6b3d8ea7bb69 --- /dev/null +++ b/include/dt-bindings/power/allwinner,sun55i-a523-pck-600.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_POWER_SUN55I_A523_PCK600_H_ +#define _DT_BINDINGS_POWER_SUN55I_A523_PCK600_H_ + +#define PD_VE 0 +#define PD_GPU 1 +#define PD_VI 2 +#define PD_VO0 3 +#define PD_VO1 4 +#define PD_DE 5 +#define PD_NAND 6 +#define PD_PCIE 7 + +#endif /* _DT_BINDINGS_POWER_SUN55I_A523_PCK600_H_ */ diff --git a/include/dt-bindings/power/allwinner,sun55i-a523-ppu.h b/include/dt-bindings/power/allwinner,sun55i-a523-ppu.h new file mode 100644 index 000000000000..bc9aba73c19a --- /dev/null +++ b/include/dt-bindings/power/allwinner,sun55i-a523-ppu.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_POWER_SUN55I_A523_PPU_H_ +#define _DT_BINDINGS_POWER_SUN55I_A523_PPU_H_ + +#define PD_DSP 0 +#define PD_NPU 1 +#define PD_AUDIO 2 +#define PD_SRAM 3 +#define PD_RISCV 4 + +#endif /* _DT_BINDINGS_POWER_SUN55I_A523_PPU_H_ */ -- cgit v1.2.3 From bd116214d53c66dc7f863822af171b20c06b4784 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 15 Jul 2025 13:53:20 +0200 Subject: blktrace: add zoned block commands to blk_fill_rwbs Add zoned block commands to blk_fill_rwbs: - ZONE APPEND will be decoded as 'ZA' - ZONE RESET will be decoded as 'ZR' - ZONE RESET ALL will be decoded as 'ZRA' - ZONE FINISH will be decoded as 'ZF' - ZONE OPEN will be decoded as 'ZO' - ZONE CLOSE will be decoded as 'ZC' Reviewed-by: Chaitanya Kulkarni Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250715115324.53308-2-johannes.thumshirn@wdc.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 14a924c0e303..d88669b3ce02 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -11,7 +11,7 @@ #include #include -#define RWBS_LEN 9 +#define RWBS_LEN 10 #define IOPRIO_CLASS_STRINGS \ { IOPRIO_CLASS_NONE, "none" }, \ -- cgit v1.2.3 From 4cc21a00762b5bd4dcd743317a56c2dba500fd89 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 15 Jul 2025 13:53:22 +0200 Subject: block: add tracepoint for blk_zone_update_request_bio Add a tracepoint in blk_zone_update_request_bio() to trace the bio sector update on ZONE APPEND completions. An example for this tracepoint is as follows: -0 [001] d.h1. 381.746444: blk_zone_update_request_bio: 259,5 ZAS 131072 () 1048832 + 256 none,0,0 [swapper/1] Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250715115324.53308-4-johannes.thumshirn@wdc.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d88669b3ce02..4855abdf9880 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -404,6 +404,17 @@ DEFINE_EVENT(block_bio, block_getrq, TP_ARGS(bio) ); +/** + * block_zone_update_request_bio - update the bio sector after a zone append + * @bio: the completed block IO operation + * + * Update the bio's bi_sector after a zone append command has been completed. + */ +DEFINE_EVENT(block_rq, blk_zone_append_update_request_bio, + TP_PROTO(struct request *rq), + TP_ARGS(rq) +); + /** * block_plug - keep operations requests in request queue * @q: request queue to plug -- cgit v1.2.3 From 4020d22f0d08ccfc0d00a254a90250ff07333607 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 15 Jul 2025 13:53:23 +0200 Subject: block: add tracepoint for blkdev_zone_mgmt Add a tracepoint for blkdev_zone_mgmt to trace zone management commands submitted by higher layers like file systems or user space. An example output for this tracepoint is as follows: mkfs.btrfs-203 [001] ..... 42.877493: blkdev_zone_mgmt: 8,0 ZRS 5242880 + 0 This example output shows a REQ_OP_ZONE_RESET operation submitted by mkfs.btrfs. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250715115324.53308-5-johannes.thumshirn@wdc.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 4855abdf9880..ff7698efdfde 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -599,6 +599,40 @@ TRACE_EVENT(block_rq_remap, (unsigned long long)__entry->old_sector, __entry->nr_bios) ); +/** + * blkdev_zone_mgmt - Execute a zone management operation on a range of zones + * @bio: The block IO operation sent down to the device + * @nr_sectors: The number of sectors affected by this operation + * + * Execute a zone management operation on a specified range of zones. This + * range is encoded in %nr_sectors, which has to be a multiple of the zone + * size. + */ +TRACE_EVENT(blkdev_zone_mgmt, + + TP_PROTO(struct bio *bio, sector_t nr_sectors), + + TP_ARGS(bio, nr_sectors), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( sector_t, nr_sectors ) + __array( char, rwbs, RWBS_LEN) + ), + + TP_fast_assign( + __entry->dev = bio_dev(bio); + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sectors = bio_sectors(bio); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf); + ), + + TP_printk("%d,%d %s %llu + %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sectors) +); #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 2e92ac61c9012ae4bcde2838c5f57f85e4b2623c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 15 Jul 2025 13:53:24 +0200 Subject: block: add trace messages to zone write plugging Add tracepoints to zone write plugging plug and unplug events. Examples for these events are: kworker/u10:4-393 [001] d..1. 282.991660: disk_zone_wplug_add_bio: 8,0 zone 16, BIO 8388608 + 128 kworker/0:1H-58 [ [000] d..1. 283.083294: blk_zone_wplug_bio: 8,0 zone 15, BIO 7864320 + 128 Reviewed-by: Chaitanya Kulkarni Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250715115324.53308-6-johannes.thumshirn@wdc.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index ff7698efdfde..3e582d5e3a57 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -633,6 +633,50 @@ TRACE_EVENT(blkdev_zone_mgmt, (unsigned long long)__entry->sector, __entry->nr_sectors) ); + +DECLARE_EVENT_CLASS(block_zwplug, + + TP_PROTO(struct request_queue *q, unsigned int zno, sector_t sector, + unsigned int nr_sectors), + + TP_ARGS(q, zno, sector, nr_sectors), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned int, zno ) + __field( sector_t, sector ) + __field( unsigned int, nr_sectors ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(q->disk); + __entry->zno = zno; + __entry->sector = sector; + __entry->nr_sectors = nr_sectors; + ), + + TP_printk("%d,%d zone %u, BIO %llu + %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->zno, + (unsigned long long)__entry->sector, + __entry->nr_sectors) +); + +DEFINE_EVENT(block_zwplug, disk_zone_wplug_add_bio, + + TP_PROTO(struct request_queue *q, unsigned int zno, sector_t sector, + unsigned int nr_sectors), + + TP_ARGS(q, zno, sector, nr_sectors) +); + +DEFINE_EVENT(block_zwplug, blk_zone_wplug_bio, + + TP_PROTO(struct request_queue *q, unsigned int zno, sector_t sector, + unsigned int nr_sectors), + + TP_ARGS(q, zno, sector, nr_sectors) +); + #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 12f33ef6c2aaa410b7ccf039289fe2b04ab2252f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sun, 13 Jul 2025 11:36:12 -0400 Subject: HID: core: Improve the kerneldoc for hid_report_len() The kerneldoc for hid_report_len() needs to be improved. The description of the @report argument is ungrammatical, and the documentation does not explain under what circumstances the report length will include the byte reserved for the report ID. Let's fix up the kerneldoc. Signed-off-by: Alan Stern Link: https://patch.msgid.link/1c8416cb-7347-4a06-b00a-20518069d263@rowland.harvard.edu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 7f260e0e2049..2cc4f1e4ea96 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1216,7 +1216,11 @@ static inline void hid_hw_wait(struct hid_device *hdev) /** * hid_report_len - calculate the report length * - * @report: the report we want to know the length + * @report: the report whose length we want to know + * + * The length counts the report ID byte, but only if the ID is nonzero + * and therefore is included in the report. Reports whose ID is zero + * never include an ID byte. */ static inline u32 hid_report_len(struct hid_report *report) { -- cgit v1.2.3 From c57ad862462f064c0bd943a5828f5e0eca469ca5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 7 Jul 2025 13:41:51 +0100 Subject: ASoC: SDCA: Move SDCA search functions and export The ASoC code for SDCA contains several helper functions that search for controls/ranges/etc. As the code evolves these helpers are likely to be useful to anything interacting with the stored DisCo data. Move the helpers into sdca_function.c and export them so other modules can also use them. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250707124155.2596744-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index b4a97ff08729..543c09e99ab1 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -1316,4 +1316,15 @@ int sdca_parse_function(struct device *dev, struct sdca_function_desc *desc, struct sdca_function_data *function); +struct sdca_control *sdca_selector_find_control(struct device *dev, + struct sdca_entity *entity, + const int sel); +struct sdca_control_range *sdca_control_find_range(struct device *dev, + struct sdca_entity *entity, + struct sdca_control *control, + int cols, int rows); +struct sdca_control_range *sdca_selector_find_range(struct device *dev, + struct sdca_entity *entity, + int sel, int cols, int rows); + #endif -- cgit v1.2.3 From 5f86d41d0410b072b5f4875ef5d38bf8d18eed55 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 7 Jul 2025 13:41:52 +0100 Subject: ASoC: soc-dai: Add private data to snd_soc_dai Add a private data pointer that can be used to store context along with the DAI. This will be useful to allow the SDCA class library to store data separately from the CODEC driver itself. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250707124155.2596744-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index d19ab5572d2b..166c29557e9d 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -463,6 +463,9 @@ struct snd_soc_dai { /* bit field */ unsigned int probed:1; + + /* DAI private data */ + void *priv; }; static inline const struct snd_soc_pcm_stream * -- cgit v1.2.3 From 7b0d60dbb468fa82e9053292cdc8a5436400bfaf Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 7 Jul 2025 13:41:53 +0100 Subject: ASoC: SDCA: Add helper to add DAI constraints Currently the core SDCA code simply creates a place holder available channels from 1 to SDCA_MAX_CHANNEL_COUNT. Add a helper function that will constrain the number of channels based on the actual available SDCA Clusters in DisCo. Currently this code only handles Input Terminal Entities as they directly specify the Cluster. More work will be required later for Output Terminals which inherit their Cluster. Typically this new helper would be called from the DAIs startup callback. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250707124155.2596744-6-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_asoc.h | 10 ++++++++++ include/sound/sdca_function.h | 12 ++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h index 9121531f0826..bbf146e4fcea 100644 --- a/include/sound/sdca_asoc.h +++ b/include/sound/sdca_asoc.h @@ -11,9 +11,12 @@ #define __SDCA_ASOC_H__ struct device; +struct regmap; struct sdca_function_data; struct snd_kcontrol_new; +struct snd_pcm_substream; struct snd_soc_component_driver; +struct snd_soc_dai; struct snd_soc_dai_driver; struct snd_soc_dai_ops; struct snd_soc_dapm_route; @@ -39,4 +42,11 @@ int sdca_asoc_populate_component(struct device *dev, struct snd_soc_dai_driver **dai_drv, int *num_dai_drv, const struct snd_soc_dai_ops *ops); +int sdca_asoc_set_constraints(struct device *dev, struct regmap *regmap, + struct sdca_function_data *function, + struct snd_pcm_substream *substream, + struct snd_soc_dai *dai); +void sdca_asoc_free_constraints(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai); + #endif // __SDCA_ASOC_H__ diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 543c09e99ab1..3bde07409bf3 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -1268,6 +1268,15 @@ struct sdca_cluster { struct sdca_channel *channels; }; +/** + * enum sdca_cluster_range - SDCA Range column definitions for ClusterIndex + */ +enum sdca_cluster_range { + SDCA_CLUSTER_BYTEINDEX = 0, + SDCA_CLUSTER_CLUSTERID = 1, + SDCA_CLUSTER_NCOLS = 2, +}; + /** * struct sdca_function_data - top-level information for one SDCA function * @desc: Pointer to short descriptor from initial parsing. @@ -1326,5 +1335,8 @@ struct sdca_control_range *sdca_control_find_range(struct device *dev, struct sdca_control_range *sdca_selector_find_range(struct device *dev, struct sdca_entity *entity, int sel, int cols, int rows); +struct sdca_cluster *sdca_id_find_cluster(struct device *dev, + struct sdca_function_data *function, + const int id); #endif -- cgit v1.2.3 From 264d3d776fb1a428706b0ca0f679bbed876fe7c9 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 7 Jul 2025 13:41:54 +0100 Subject: ASoC: SDCA: Add a helper to get the SoundWire port number Add a helper function to extract the SoundWire hardware port number from the SDCA DataPort Selector Control. Typically this would be called from hw_params() and used to call sdw_stream_add_slave(). Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250707124155.2596744-7-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_asoc.h | 3 +++ include/sound/sdca_function.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h index bbf146e4fcea..800a26adcd8e 100644 --- a/include/sound/sdca_asoc.h +++ b/include/sound/sdca_asoc.h @@ -48,5 +48,8 @@ int sdca_asoc_set_constraints(struct device *dev, struct regmap *regmap, struct snd_soc_dai *dai); void sdca_asoc_free_constraints(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); +int sdca_asoc_get_port(struct device *dev, struct regmap *regmap, + struct sdca_function_data *function, + struct snd_soc_dai *dai); #endif // __SDCA_ASOC_H__ diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 3bde07409bf3..90d77fc46416 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -185,6 +185,14 @@ enum sdca_usage_range { SDCA_USAGE_NCOLS = 7, }; +/** + * enum sdca_dataport_selector_range - Column definitions for DataPort_Selector + */ +enum sdca_dataport_selector_range { + SDCA_DATAPORT_SELECTOR_NCOLS = 16, + SDCA_DATAPORT_SELECTOR_NROWS = 4, +}; + /** * enum sdca_mu_controls - SDCA Controls for Mixer Unit * -- cgit v1.2.3 From 4ed357f72a0e0a691304e5f14a3323811c8ce862 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 7 Jul 2025 13:41:55 +0100 Subject: ASoC: SDCA: Add hw_params() helper function Add a helper function that can be called from hw_params() in the DAI ops to configure the SDCA Cluster, Clock and Usage controls. These setup the channels, sample rate, and bit depths that will be used by the Terminal. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250707124155.2596744-8-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_asoc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h index 800a26adcd8e..aa9124f93218 100644 --- a/include/sound/sdca_asoc.h +++ b/include/sound/sdca_asoc.h @@ -14,6 +14,7 @@ struct device; struct regmap; struct sdca_function_data; struct snd_kcontrol_new; +struct snd_pcm_hw_params; struct snd_pcm_substream; struct snd_soc_component_driver; struct snd_soc_dai; @@ -51,5 +52,10 @@ void sdca_asoc_free_constraints(struct snd_pcm_substream *substream, int sdca_asoc_get_port(struct device *dev, struct regmap *regmap, struct sdca_function_data *function, struct snd_soc_dai *dai); +int sdca_asoc_hw_params(struct device *dev, struct regmap *regmap, + struct sdca_function_data *function, + struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai); #endif // __SDCA_ASOC_H__ -- cgit v1.2.3 From c34632dbb29ba7016f1cd2e629ac9dd07f84ce50 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Mon, 14 Jul 2025 13:02:02 -0400 Subject: bnxt: move bnxt_hsi.h to include/linux/bnxt/hsi.h This moves bnxt_hsi.h contents to a common location so it can be properly referenced by bnxt_en, bnxt_re, and bnge. Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: Pavan Chebbi Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250714170202.39688-1-gospo@broadcom.com Signed-off-by: Jakub Kicinski --- include/linux/bnxt/hsi.h | 10914 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 10914 insertions(+) create mode 100644 include/linux/bnxt/hsi.h (limited to 'include') diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h new file mode 100644 index 000000000000..549231703bce --- /dev/null +++ b/include/linux/bnxt/hsi.h @@ -0,0 +1,10914 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2014-2016 Broadcom Corporation + * Copyright (c) 2014-2018 Broadcom Limited + * Copyright (c) 2018-2025 Broadcom Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * DO NOT MODIFY!!! This file is automatically generated. + */ + +#ifndef _BNXT_HSI_H_ +#define _BNXT_HSI_H_ + +/* hwrm_cmd_hdr (size:128b/16B) */ +struct hwrm_cmd_hdr { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_resp_hdr (size:64b/8B) */ +struct hwrm_resp_hdr { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; +}; + +#define CMD_DISCR_TLV_ENCAP 0x8000UL +#define CMD_DISCR_LAST CMD_DISCR_TLV_ENCAP + + +#define TLV_TYPE_HWRM_REQUEST 0x1UL +#define TLV_TYPE_HWRM_RESPONSE 0x2UL +#define TLV_TYPE_ROCE_SP_COMMAND 0x3UL +#define TLV_TYPE_QUERY_ROCE_CC_GEN1 0x4UL +#define TLV_TYPE_MODIFY_ROCE_CC_GEN1 0x5UL +#define TLV_TYPE_QUERY_ROCE_CC_GEN2 0x6UL +#define TLV_TYPE_MODIFY_ROCE_CC_GEN2 0x7UL +#define TLV_TYPE_QUERY_ROCE_CC_GEN1_EXT 0x8UL +#define TLV_TYPE_MODIFY_ROCE_CC_GEN1_EXT 0x9UL +#define TLV_TYPE_QUERY_ROCE_CC_GEN2_EXT 0xaUL +#define TLV_TYPE_MODIFY_ROCE_CC_GEN2_EXT 0xbUL +#define TLV_TYPE_ENGINE_CKV_ALIAS_ECC_PUBLIC_KEY 0x8001UL +#define TLV_TYPE_ENGINE_CKV_IV 0x8003UL +#define TLV_TYPE_ENGINE_CKV_AUTH_TAG 0x8004UL +#define TLV_TYPE_ENGINE_CKV_CIPHERTEXT 0x8005UL +#define TLV_TYPE_ENGINE_CKV_HOST_ALGORITHMS 0x8006UL +#define TLV_TYPE_ENGINE_CKV_HOST_ECC_PUBLIC_KEY 0x8007UL +#define TLV_TYPE_ENGINE_CKV_ECDSA_SIGNATURE 0x8008UL +#define TLV_TYPE_ENGINE_CKV_FW_ECC_PUBLIC_KEY 0x8009UL +#define TLV_TYPE_ENGINE_CKV_FW_ALGORITHMS 0x800aUL +#define TLV_TYPE_LAST TLV_TYPE_ENGINE_CKV_FW_ALGORITHMS + + +/* tlv (size:64b/8B) */ +struct tlv { + __le16 cmd_discr; + u8 reserved_8b; + u8 flags; + #define TLV_FLAGS_MORE 0x1UL + #define TLV_FLAGS_MORE_LAST 0x0UL + #define TLV_FLAGS_MORE_NOT_LAST 0x1UL + #define TLV_FLAGS_REQUIRED 0x2UL + #define TLV_FLAGS_REQUIRED_NO (0x0UL << 1) + #define TLV_FLAGS_REQUIRED_YES (0x1UL << 1) + #define TLV_FLAGS_REQUIRED_LAST TLV_FLAGS_REQUIRED_YES + __le16 tlv_type; + __le16 length; +}; + +/* input (size:128b/16B) */ +struct input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* output (size:64b/8B) */ +struct output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; +}; + +/* hwrm_short_input (size:128b/16B) */ +struct hwrm_short_input { + __le16 req_type; + __le16 signature; + #define SHORT_REQ_SIGNATURE_SHORT_CMD 0x4321UL + #define SHORT_REQ_SIGNATURE_LAST SHORT_REQ_SIGNATURE_SHORT_CMD + __le16 target_id; + #define SHORT_REQ_TARGET_ID_DEFAULT 0x0UL + #define SHORT_REQ_TARGET_ID_TOOLS 0xfffdUL + #define SHORT_REQ_TARGET_ID_LAST SHORT_REQ_TARGET_ID_TOOLS + __le16 size; + __le64 req_addr; +}; + +/* cmd_nums (size:64b/8B) */ +struct cmd_nums { + __le16 req_type; + #define HWRM_VER_GET 0x0UL + #define HWRM_FUNC_ECHO_RESPONSE 0xbUL + #define HWRM_ERROR_RECOVERY_QCFG 0xcUL + #define HWRM_FUNC_DRV_IF_CHANGE 0xdUL + #define HWRM_FUNC_BUF_UNRGTR 0xeUL + #define HWRM_FUNC_VF_CFG 0xfUL + #define HWRM_RESERVED1 0x10UL + #define HWRM_FUNC_RESET 0x11UL + #define HWRM_FUNC_GETFID 0x12UL + #define HWRM_FUNC_VF_ALLOC 0x13UL + #define HWRM_FUNC_VF_FREE 0x14UL + #define HWRM_FUNC_QCAPS 0x15UL + #define HWRM_FUNC_QCFG 0x16UL + #define HWRM_FUNC_CFG 0x17UL + #define HWRM_FUNC_QSTATS 0x18UL + #define HWRM_FUNC_CLR_STATS 0x19UL + #define HWRM_FUNC_DRV_UNRGTR 0x1aUL + #define HWRM_FUNC_VF_RESC_FREE 0x1bUL + #define HWRM_FUNC_VF_VNIC_IDS_QUERY 0x1cUL + #define HWRM_FUNC_DRV_RGTR 0x1dUL + #define HWRM_FUNC_DRV_QVER 0x1eUL + #define HWRM_FUNC_BUF_RGTR 0x1fUL + #define HWRM_PORT_PHY_CFG 0x20UL + #define HWRM_PORT_MAC_CFG 0x21UL + #define HWRM_PORT_TS_QUERY 0x22UL + #define HWRM_PORT_QSTATS 0x23UL + #define HWRM_PORT_LPBK_QSTATS 0x24UL + #define HWRM_PORT_CLR_STATS 0x25UL + #define HWRM_PORT_LPBK_CLR_STATS 0x26UL + #define HWRM_PORT_PHY_QCFG 0x27UL + #define HWRM_PORT_MAC_QCFG 0x28UL + #define HWRM_PORT_MAC_PTP_QCFG 0x29UL + #define HWRM_PORT_PHY_QCAPS 0x2aUL + #define HWRM_PORT_PHY_I2C_WRITE 0x2bUL + #define HWRM_PORT_PHY_I2C_READ 0x2cUL + #define HWRM_PORT_LED_CFG 0x2dUL + #define HWRM_PORT_LED_QCFG 0x2eUL + #define HWRM_PORT_LED_QCAPS 0x2fUL + #define HWRM_QUEUE_QPORTCFG 0x30UL + #define HWRM_QUEUE_QCFG 0x31UL + #define HWRM_QUEUE_CFG 0x32UL + #define HWRM_FUNC_VLAN_CFG 0x33UL + #define HWRM_FUNC_VLAN_QCFG 0x34UL + #define HWRM_QUEUE_PFCENABLE_QCFG 0x35UL + #define HWRM_QUEUE_PFCENABLE_CFG 0x36UL + #define HWRM_QUEUE_PRI2COS_QCFG 0x37UL + #define HWRM_QUEUE_PRI2COS_CFG 0x38UL + #define HWRM_QUEUE_COS2BW_QCFG 0x39UL + #define HWRM_QUEUE_COS2BW_CFG 0x3aUL + #define HWRM_QUEUE_DSCP_QCAPS 0x3bUL + #define HWRM_QUEUE_DSCP2PRI_QCFG 0x3cUL + #define HWRM_QUEUE_DSCP2PRI_CFG 0x3dUL + #define HWRM_VNIC_ALLOC 0x40UL + #define HWRM_VNIC_FREE 0x41UL + #define HWRM_VNIC_CFG 0x42UL + #define HWRM_VNIC_QCFG 0x43UL + #define HWRM_VNIC_TPA_CFG 0x44UL + #define HWRM_VNIC_TPA_QCFG 0x45UL + #define HWRM_VNIC_RSS_CFG 0x46UL + #define HWRM_VNIC_RSS_QCFG 0x47UL + #define HWRM_VNIC_PLCMODES_CFG 0x48UL + #define HWRM_VNIC_PLCMODES_QCFG 0x49UL + #define HWRM_VNIC_QCAPS 0x4aUL + #define HWRM_VNIC_UPDATE 0x4bUL + #define HWRM_RING_ALLOC 0x50UL + #define HWRM_RING_FREE 0x51UL + #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS 0x52UL + #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS 0x53UL + #define HWRM_RING_AGGINT_QCAPS 0x54UL + #define HWRM_RING_SCHQ_ALLOC 0x55UL + #define HWRM_RING_SCHQ_CFG 0x56UL + #define HWRM_RING_SCHQ_FREE 0x57UL + #define HWRM_RING_RESET 0x5eUL + #define HWRM_RING_GRP_ALLOC 0x60UL + #define HWRM_RING_GRP_FREE 0x61UL + #define HWRM_RING_CFG 0x62UL + #define HWRM_RING_QCFG 0x63UL + #define HWRM_RESERVED5 0x64UL + #define HWRM_RESERVED6 0x65UL + #define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC 0x70UL + #define HWRM_VNIC_RSS_COS_LB_CTX_FREE 0x71UL + #define HWRM_QUEUE_MPLS_QCAPS 0x80UL + #define HWRM_QUEUE_MPLSTC2PRI_QCFG 0x81UL + #define HWRM_QUEUE_MPLSTC2PRI_CFG 0x82UL + #define HWRM_QUEUE_VLANPRI_QCAPS 0x83UL + #define HWRM_QUEUE_VLANPRI2PRI_QCFG 0x84UL + #define HWRM_QUEUE_VLANPRI2PRI_CFG 0x85UL + #define HWRM_QUEUE_GLOBAL_CFG 0x86UL + #define HWRM_QUEUE_GLOBAL_QCFG 0x87UL + #define HWRM_QUEUE_ADPTV_QOS_RX_FEATURE_QCFG 0x88UL + #define HWRM_QUEUE_ADPTV_QOS_RX_FEATURE_CFG 0x89UL + #define HWRM_QUEUE_ADPTV_QOS_TX_FEATURE_QCFG 0x8aUL + #define HWRM_QUEUE_ADPTV_QOS_TX_FEATURE_CFG 0x8bUL + #define HWRM_QUEUE_QCAPS 0x8cUL + #define HWRM_QUEUE_ADPTV_QOS_RX_TUNING_QCFG 0x8dUL + #define HWRM_QUEUE_ADPTV_QOS_RX_TUNING_CFG 0x8eUL + #define HWRM_QUEUE_ADPTV_QOS_TX_TUNING_QCFG 0x8fUL + #define HWRM_CFA_L2_FILTER_ALLOC 0x90UL + #define HWRM_CFA_L2_FILTER_FREE 0x91UL + #define HWRM_CFA_L2_FILTER_CFG 0x92UL + #define HWRM_CFA_L2_SET_RX_MASK 0x93UL + #define HWRM_CFA_VLAN_ANTISPOOF_CFG 0x94UL + #define HWRM_CFA_TUNNEL_FILTER_ALLOC 0x95UL + #define HWRM_CFA_TUNNEL_FILTER_FREE 0x96UL + #define HWRM_CFA_ENCAP_RECORD_ALLOC 0x97UL + #define HWRM_CFA_ENCAP_RECORD_FREE 0x98UL + #define HWRM_CFA_NTUPLE_FILTER_ALLOC 0x99UL + #define HWRM_CFA_NTUPLE_FILTER_FREE 0x9aUL + #define HWRM_CFA_NTUPLE_FILTER_CFG 0x9bUL + #define HWRM_CFA_EM_FLOW_ALLOC 0x9cUL + #define HWRM_CFA_EM_FLOW_FREE 0x9dUL + #define HWRM_CFA_EM_FLOW_CFG 0x9eUL + #define HWRM_TUNNEL_DST_PORT_QUERY 0xa0UL + #define HWRM_TUNNEL_DST_PORT_ALLOC 0xa1UL + #define HWRM_TUNNEL_DST_PORT_FREE 0xa2UL + #define HWRM_QUEUE_ADPTV_QOS_TX_TUNING_CFG 0xa3UL + #define HWRM_STAT_CTX_ENG_QUERY 0xafUL + #define HWRM_STAT_CTX_ALLOC 0xb0UL + #define HWRM_STAT_CTX_FREE 0xb1UL + #define HWRM_STAT_CTX_QUERY 0xb2UL + #define HWRM_STAT_CTX_CLR_STATS 0xb3UL + #define HWRM_PORT_QSTATS_EXT 0xb4UL + #define HWRM_PORT_PHY_MDIO_WRITE 0xb5UL + #define HWRM_PORT_PHY_MDIO_READ 0xb6UL + #define HWRM_PORT_PHY_MDIO_BUS_ACQUIRE 0xb7UL + #define HWRM_PORT_PHY_MDIO_BUS_RELEASE 0xb8UL + #define HWRM_PORT_QSTATS_EXT_PFC_WD 0xb9UL + #define HWRM_RESERVED7 0xbaUL + #define HWRM_PORT_TX_FIR_CFG 0xbbUL + #define HWRM_PORT_TX_FIR_QCFG 0xbcUL + #define HWRM_PORT_ECN_QSTATS 0xbdUL + #define HWRM_FW_LIVEPATCH_QUERY 0xbeUL + #define HWRM_FW_LIVEPATCH 0xbfUL + #define HWRM_FW_RESET 0xc0UL + #define HWRM_FW_QSTATUS 0xc1UL + #define HWRM_FW_HEALTH_CHECK 0xc2UL + #define HWRM_FW_SYNC 0xc3UL + #define HWRM_FW_STATE_QCAPS 0xc4UL + #define HWRM_FW_STATE_QUIESCE 0xc5UL + #define HWRM_FW_STATE_BACKUP 0xc6UL + #define HWRM_FW_STATE_RESTORE 0xc7UL + #define HWRM_FW_SET_TIME 0xc8UL + #define HWRM_FW_GET_TIME 0xc9UL + #define HWRM_FW_SET_STRUCTURED_DATA 0xcaUL + #define HWRM_FW_GET_STRUCTURED_DATA 0xcbUL + #define HWRM_FW_IPC_MAILBOX 0xccUL + #define HWRM_FW_ECN_CFG 0xcdUL + #define HWRM_FW_ECN_QCFG 0xceUL + #define HWRM_FW_SECURE_CFG 0xcfUL + #define HWRM_EXEC_FWD_RESP 0xd0UL + #define HWRM_REJECT_FWD_RESP 0xd1UL + #define HWRM_FWD_RESP 0xd2UL + #define HWRM_FWD_ASYNC_EVENT_CMPL 0xd3UL + #define HWRM_OEM_CMD 0xd4UL + #define HWRM_PORT_PRBS_TEST 0xd5UL + #define HWRM_PORT_SFP_SIDEBAND_CFG 0xd6UL + #define HWRM_PORT_SFP_SIDEBAND_QCFG 0xd7UL + #define HWRM_FW_STATE_UNQUIESCE 0xd8UL + #define HWRM_PORT_DSC_DUMP 0xd9UL + #define HWRM_PORT_EP_TX_QCFG 0xdaUL + #define HWRM_PORT_EP_TX_CFG 0xdbUL + #define HWRM_PORT_CFG 0xdcUL + #define HWRM_PORT_QCFG 0xddUL + #define HWRM_PORT_MAC_QCAPS 0xdfUL + #define HWRM_TEMP_MONITOR_QUERY 0xe0UL + #define HWRM_REG_POWER_QUERY 0xe1UL + #define HWRM_CORE_FREQUENCY_QUERY 0xe2UL + #define HWRM_REG_POWER_HISTOGRAM 0xe3UL + #define HWRM_WOL_FILTER_ALLOC 0xf0UL + #define HWRM_WOL_FILTER_FREE 0xf1UL + #define HWRM_WOL_FILTER_QCFG 0xf2UL + #define HWRM_WOL_REASON_QCFG 0xf3UL + #define HWRM_CFA_METER_QCAPS 0xf4UL + #define HWRM_CFA_METER_PROFILE_ALLOC 0xf5UL + #define HWRM_CFA_METER_PROFILE_FREE 0xf6UL + #define HWRM_CFA_METER_PROFILE_CFG 0xf7UL + #define HWRM_CFA_METER_INSTANCE_ALLOC 0xf8UL + #define HWRM_CFA_METER_INSTANCE_FREE 0xf9UL + #define HWRM_CFA_METER_INSTANCE_CFG 0xfaUL + #define HWRM_CFA_VFR_ALLOC 0xfdUL + #define HWRM_CFA_VFR_FREE 0xfeUL + #define HWRM_CFA_VF_PAIR_ALLOC 0x100UL + #define HWRM_CFA_VF_PAIR_FREE 0x101UL + #define HWRM_CFA_VF_PAIR_INFO 0x102UL + #define HWRM_CFA_FLOW_ALLOC 0x103UL + #define HWRM_CFA_FLOW_FREE 0x104UL + #define HWRM_CFA_FLOW_FLUSH 0x105UL + #define HWRM_CFA_FLOW_STATS 0x106UL + #define HWRM_CFA_FLOW_INFO 0x107UL + #define HWRM_CFA_DECAP_FILTER_ALLOC 0x108UL + #define HWRM_CFA_DECAP_FILTER_FREE 0x109UL + #define HWRM_CFA_VLAN_ANTISPOOF_QCFG 0x10aUL + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC 0x10bUL + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE 0x10cUL + #define HWRM_CFA_PAIR_ALLOC 0x10dUL + #define HWRM_CFA_PAIR_FREE 0x10eUL + #define HWRM_CFA_PAIR_INFO 0x10fUL + #define HWRM_FW_IPC_MSG 0x110UL + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO 0x111UL + #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE 0x112UL + #define HWRM_CFA_FLOW_AGING_TIMER_RESET 0x113UL + #define HWRM_CFA_FLOW_AGING_CFG 0x114UL + #define HWRM_CFA_FLOW_AGING_QCFG 0x115UL + #define HWRM_CFA_FLOW_AGING_QCAPS 0x116UL + #define HWRM_CFA_CTX_MEM_RGTR 0x117UL + #define HWRM_CFA_CTX_MEM_UNRGTR 0x118UL + #define HWRM_CFA_CTX_MEM_QCTX 0x119UL + #define HWRM_CFA_CTX_MEM_QCAPS 0x11aUL + #define HWRM_CFA_COUNTER_QCAPS 0x11bUL + #define HWRM_CFA_COUNTER_CFG 0x11cUL + #define HWRM_CFA_COUNTER_QCFG 0x11dUL + #define HWRM_CFA_COUNTER_QSTATS 0x11eUL + #define HWRM_CFA_TCP_FLAG_PROCESS_QCFG 0x11fUL + #define HWRM_CFA_EEM_QCAPS 0x120UL + #define HWRM_CFA_EEM_CFG 0x121UL + #define HWRM_CFA_EEM_QCFG 0x122UL + #define HWRM_CFA_EEM_OP 0x123UL + #define HWRM_CFA_ADV_FLOW_MGNT_QCAPS 0x124UL + #define HWRM_CFA_TFLIB 0x125UL + #define HWRM_CFA_LAG_GROUP_MEMBER_RGTR 0x126UL + #define HWRM_CFA_LAG_GROUP_MEMBER_UNRGTR 0x127UL + #define HWRM_CFA_TLS_FILTER_ALLOC 0x128UL + #define HWRM_CFA_TLS_FILTER_FREE 0x129UL + #define HWRM_CFA_RELEASE_AFM_FUNC 0x12aUL + #define HWRM_ENGINE_CKV_STATUS 0x12eUL + #define HWRM_ENGINE_CKV_CKEK_ADD 0x12fUL + #define HWRM_ENGINE_CKV_CKEK_DELETE 0x130UL + #define HWRM_ENGINE_CKV_KEY_ADD 0x131UL + #define HWRM_ENGINE_CKV_KEY_DELETE 0x132UL + #define HWRM_ENGINE_CKV_FLUSH 0x133UL + #define HWRM_ENGINE_CKV_RNG_GET 0x134UL + #define HWRM_ENGINE_CKV_KEY_GEN 0x135UL + #define HWRM_ENGINE_CKV_KEY_LABEL_CFG 0x136UL + #define HWRM_ENGINE_CKV_KEY_LABEL_QCFG 0x137UL + #define HWRM_ENGINE_QG_CONFIG_QUERY 0x13cUL + #define HWRM_ENGINE_QG_QUERY 0x13dUL + #define HWRM_ENGINE_QG_METER_PROFILE_CONFIG_QUERY 0x13eUL + #define HWRM_ENGINE_QG_METER_PROFILE_QUERY 0x13fUL + #define HWRM_ENGINE_QG_METER_PROFILE_ALLOC 0x140UL + #define HWRM_ENGINE_QG_METER_PROFILE_FREE 0x141UL + #define HWRM_ENGINE_QG_METER_QUERY 0x142UL + #define HWRM_ENGINE_QG_METER_BIND 0x143UL + #define HWRM_ENGINE_QG_METER_UNBIND 0x144UL + #define HWRM_ENGINE_QG_FUNC_BIND 0x145UL + #define HWRM_ENGINE_SG_CONFIG_QUERY 0x146UL + #define HWRM_ENGINE_SG_QUERY 0x147UL + #define HWRM_ENGINE_SG_METER_QUERY 0x148UL + #define HWRM_ENGINE_SG_METER_CONFIG 0x149UL + #define HWRM_ENGINE_SG_QG_BIND 0x14aUL + #define HWRM_ENGINE_QG_SG_UNBIND 0x14bUL + #define HWRM_ENGINE_CONFIG_QUERY 0x154UL + #define HWRM_ENGINE_STATS_CONFIG 0x155UL + #define HWRM_ENGINE_STATS_CLEAR 0x156UL + #define HWRM_ENGINE_STATS_QUERY 0x157UL + #define HWRM_ENGINE_STATS_QUERY_CONTINUOUS_ERROR 0x158UL + #define HWRM_ENGINE_RQ_ALLOC 0x15eUL + #define HWRM_ENGINE_RQ_FREE 0x15fUL + #define HWRM_ENGINE_CQ_ALLOC 0x160UL + #define HWRM_ENGINE_CQ_FREE 0x161UL + #define HWRM_ENGINE_NQ_ALLOC 0x162UL + #define HWRM_ENGINE_NQ_FREE 0x163UL + #define HWRM_ENGINE_ON_DIE_RQE_CREDITS 0x164UL + #define HWRM_ENGINE_FUNC_QCFG 0x165UL + #define HWRM_FUNC_RESOURCE_QCAPS 0x190UL + #define HWRM_FUNC_VF_RESOURCE_CFG 0x191UL + #define HWRM_FUNC_BACKING_STORE_QCAPS 0x192UL + #define HWRM_FUNC_BACKING_STORE_CFG 0x193UL + #define HWRM_FUNC_BACKING_STORE_QCFG 0x194UL + #define HWRM_FUNC_VF_BW_CFG 0x195UL + #define HWRM_FUNC_VF_BW_QCFG 0x196UL + #define HWRM_FUNC_HOST_PF_IDS_QUERY 0x197UL + #define HWRM_FUNC_QSTATS_EXT 0x198UL + #define HWRM_STAT_EXT_CTX_QUERY 0x199UL + #define HWRM_FUNC_SPD_CFG 0x19aUL + #define HWRM_FUNC_SPD_QCFG 0x19bUL + #define HWRM_FUNC_PTP_PIN_QCFG 0x19cUL + #define HWRM_FUNC_PTP_PIN_CFG 0x19dUL + #define HWRM_FUNC_PTP_CFG 0x19eUL + #define HWRM_FUNC_PTP_TS_QUERY 0x19fUL + #define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL + #define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL + #define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL + #define HWRM_FUNC_BACKING_STORE_CFG_V2 0x1a3UL + #define HWRM_FUNC_BACKING_STORE_QCFG_V2 0x1a4UL + #define HWRM_FUNC_DBR_PACING_CFG 0x1a5UL + #define HWRM_FUNC_DBR_PACING_QCFG 0x1a6UL + #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT 0x1a7UL + #define HWRM_FUNC_BACKING_STORE_QCAPS_V2 0x1a8UL + #define HWRM_FUNC_DBR_PACING_NQLIST_QUERY 0x1a9UL + #define HWRM_FUNC_DBR_RECOVERY_COMPLETED 0x1aaUL + #define HWRM_FUNC_SYNCE_CFG 0x1abUL + #define HWRM_FUNC_SYNCE_QCFG 0x1acUL + #define HWRM_FUNC_KEY_CTX_FREE 0x1adUL + #define HWRM_FUNC_LAG_MODE_CFG 0x1aeUL + #define HWRM_FUNC_LAG_MODE_QCFG 0x1afUL + #define HWRM_FUNC_LAG_CREATE 0x1b0UL + #define HWRM_FUNC_LAG_UPDATE 0x1b1UL + #define HWRM_FUNC_LAG_FREE 0x1b2UL + #define HWRM_FUNC_LAG_QCFG 0x1b3UL + #define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD 0x1c2UL + #define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE 0x1c3UL + #define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY 0x1c4UL + #define HWRM_SELFTEST_QLIST 0x200UL + #define HWRM_SELFTEST_EXEC 0x201UL + #define HWRM_SELFTEST_IRQ 0x202UL + #define HWRM_SELFTEST_RETRIEVE_SERDES_DATA 0x203UL + #define HWRM_PCIE_QSTATS 0x204UL + #define HWRM_MFG_FRU_WRITE_CONTROL 0x205UL + #define HWRM_MFG_TIMERS_QUERY 0x206UL + #define HWRM_MFG_OTP_CFG 0x207UL + #define HWRM_MFG_OTP_QCFG 0x208UL + #define HWRM_MFG_HDMA_TEST 0x209UL + #define HWRM_MFG_FRU_EEPROM_WRITE 0x20aUL + #define HWRM_MFG_FRU_EEPROM_READ 0x20bUL + #define HWRM_MFG_SOC_IMAGE 0x20cUL + #define HWRM_MFG_SOC_QSTATUS 0x20dUL + #define HWRM_MFG_PARAM_CRITICAL_DATA_FINALIZE 0x20eUL + #define HWRM_MFG_PARAM_CRITICAL_DATA_READ 0x20fUL + #define HWRM_MFG_PARAM_CRITICAL_DATA_HEALTH 0x210UL + #define HWRM_MFG_PRVSN_EXPORT_CSR 0x211UL + #define HWRM_MFG_PRVSN_IMPORT_CERT 0x212UL + #define HWRM_MFG_PRVSN_GET_STATE 0x213UL + #define HWRM_MFG_GET_NVM_MEASUREMENT 0x214UL + #define HWRM_MFG_PSOC_QSTATUS 0x215UL + #define HWRM_MFG_SELFTEST_QLIST 0x216UL + #define HWRM_MFG_SELFTEST_EXEC 0x217UL + #define HWRM_STAT_GENERIC_QSTATS 0x218UL + #define HWRM_MFG_PRVSN_EXPORT_CERT 0x219UL + #define HWRM_STAT_DB_ERROR_QSTATS 0x21aUL + #define HWRM_MFG_TESTS 0x21bUL + #define HWRM_MFG_WRITE_CERT_NVM 0x21cUL + #define HWRM_PORT_POE_CFG 0x230UL + #define HWRM_PORT_POE_QCFG 0x231UL + #define HWRM_UDCC_QCAPS 0x258UL + #define HWRM_UDCC_CFG 0x259UL + #define HWRM_UDCC_QCFG 0x25aUL + #define HWRM_UDCC_SESSION_CFG 0x25bUL + #define HWRM_UDCC_SESSION_QCFG 0x25cUL + #define HWRM_UDCC_SESSION_QUERY 0x25dUL + #define HWRM_UDCC_COMP_CFG 0x25eUL + #define HWRM_UDCC_COMP_QCFG 0x25fUL + #define HWRM_UDCC_COMP_QUERY 0x260UL + #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x261UL + #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x262UL + #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x263UL + #define HWRM_TF 0x2bcUL + #define HWRM_TF_VERSION_GET 0x2bdUL + #define HWRM_TF_SESSION_OPEN 0x2c6UL + #define HWRM_TF_SESSION_REGISTER 0x2c8UL + #define HWRM_TF_SESSION_UNREGISTER 0x2c9UL + #define HWRM_TF_SESSION_CLOSE 0x2caUL + #define HWRM_TF_SESSION_QCFG 0x2cbUL + #define HWRM_TF_SESSION_RESC_QCAPS 0x2ccUL + #define HWRM_TF_SESSION_RESC_ALLOC 0x2cdUL + #define HWRM_TF_SESSION_RESC_FREE 0x2ceUL + #define HWRM_TF_SESSION_RESC_FLUSH 0x2cfUL + #define HWRM_TF_SESSION_RESC_INFO 0x2d0UL + #define HWRM_TF_SESSION_HOTUP_STATE_SET 0x2d1UL + #define HWRM_TF_SESSION_HOTUP_STATE_GET 0x2d2UL + #define HWRM_TF_TBL_TYPE_GET 0x2daUL + #define HWRM_TF_TBL_TYPE_SET 0x2dbUL + #define HWRM_TF_TBL_TYPE_BULK_GET 0x2dcUL + #define HWRM_TF_EM_INSERT 0x2eaUL + #define HWRM_TF_EM_DELETE 0x2ebUL + #define HWRM_TF_EM_HASH_INSERT 0x2ecUL + #define HWRM_TF_EM_MOVE 0x2edUL + #define HWRM_TF_TCAM_SET 0x2f8UL + #define HWRM_TF_TCAM_GET 0x2f9UL + #define HWRM_TF_TCAM_MOVE 0x2faUL + #define HWRM_TF_TCAM_FREE 0x2fbUL + #define HWRM_TF_GLOBAL_CFG_SET 0x2fcUL + #define HWRM_TF_GLOBAL_CFG_GET 0x2fdUL + #define HWRM_TF_IF_TBL_SET 0x2feUL + #define HWRM_TF_IF_TBL_GET 0x2ffUL + #define HWRM_TF_RESC_USAGE_SET 0x300UL + #define HWRM_TF_RESC_USAGE_QUERY 0x301UL + #define HWRM_TF_TBL_TYPE_ALLOC 0x302UL + #define HWRM_TF_TBL_TYPE_FREE 0x303UL + #define HWRM_TFC_TBL_SCOPE_QCAPS 0x380UL + #define HWRM_TFC_TBL_SCOPE_ID_ALLOC 0x381UL + #define HWRM_TFC_TBL_SCOPE_CONFIG 0x382UL + #define HWRM_TFC_TBL_SCOPE_DECONFIG 0x383UL + #define HWRM_TFC_TBL_SCOPE_FID_ADD 0x384UL + #define HWRM_TFC_TBL_SCOPE_FID_REM 0x385UL + #define HWRM_TFC_TBL_SCOPE_POOL_ALLOC 0x386UL + #define HWRM_TFC_TBL_SCOPE_POOL_FREE 0x387UL + #define HWRM_TFC_SESSION_ID_ALLOC 0x388UL + #define HWRM_TFC_SESSION_FID_ADD 0x389UL + #define HWRM_TFC_SESSION_FID_REM 0x38aUL + #define HWRM_TFC_IDENT_ALLOC 0x38bUL + #define HWRM_TFC_IDENT_FREE 0x38cUL + #define HWRM_TFC_IDX_TBL_ALLOC 0x38dUL + #define HWRM_TFC_IDX_TBL_ALLOC_SET 0x38eUL + #define HWRM_TFC_IDX_TBL_SET 0x38fUL + #define HWRM_TFC_IDX_TBL_GET 0x390UL + #define HWRM_TFC_IDX_TBL_FREE 0x391UL + #define HWRM_TFC_GLOBAL_ID_ALLOC 0x392UL + #define HWRM_TFC_TCAM_SET 0x393UL + #define HWRM_TFC_TCAM_GET 0x394UL + #define HWRM_TFC_TCAM_ALLOC 0x395UL + #define HWRM_TFC_TCAM_ALLOC_SET 0x396UL + #define HWRM_TFC_TCAM_FREE 0x397UL + #define HWRM_TFC_IF_TBL_SET 0x398UL + #define HWRM_TFC_IF_TBL_GET 0x399UL + #define HWRM_TFC_TBL_SCOPE_CONFIG_GET 0x39aUL + #define HWRM_TFC_RESC_USAGE_QUERY 0x39bUL + #define HWRM_TFC_GLOBAL_ID_FREE 0x39cUL + #define HWRM_TFC_TCAM_PRI_UPDATE 0x39dUL + #define HWRM_TFC_HOT_UPGRADE_PROCESS 0x3a0UL + #define HWRM_SV 0x400UL + #define HWRM_DBG_SERDES_TEST 0xff0eUL + #define HWRM_DBG_LOG_BUFFER_FLUSH 0xff0fUL + #define HWRM_DBG_READ_DIRECT 0xff10UL + #define HWRM_DBG_READ_INDIRECT 0xff11UL + #define HWRM_DBG_WRITE_DIRECT 0xff12UL + #define HWRM_DBG_WRITE_INDIRECT 0xff13UL + #define HWRM_DBG_DUMP 0xff14UL + #define HWRM_DBG_ERASE_NVM 0xff15UL + #define HWRM_DBG_CFG 0xff16UL + #define HWRM_DBG_COREDUMP_LIST 0xff17UL + #define HWRM_DBG_COREDUMP_INITIATE 0xff18UL + #define HWRM_DBG_COREDUMP_RETRIEVE 0xff19UL + #define HWRM_DBG_FW_CLI 0xff1aUL + #define HWRM_DBG_I2C_CMD 0xff1bUL + #define HWRM_DBG_RING_INFO_GET 0xff1cUL + #define HWRM_DBG_CRASHDUMP_HEADER 0xff1dUL + #define HWRM_DBG_CRASHDUMP_ERASE 0xff1eUL + #define HWRM_DBG_DRV_TRACE 0xff1fUL + #define HWRM_DBG_QCAPS 0xff20UL + #define HWRM_DBG_QCFG 0xff21UL + #define HWRM_DBG_CRASHDUMP_MEDIUM_CFG 0xff22UL + #define HWRM_DBG_USEQ_ALLOC 0xff23UL + #define HWRM_DBG_USEQ_FREE 0xff24UL + #define HWRM_DBG_USEQ_FLUSH 0xff25UL + #define HWRM_DBG_USEQ_QCAPS 0xff26UL + #define HWRM_DBG_USEQ_CW_CFG 0xff27UL + #define HWRM_DBG_USEQ_SCHED_CFG 0xff28UL + #define HWRM_DBG_USEQ_RUN 0xff29UL + #define HWRM_DBG_USEQ_DELIVERY_REQ 0xff2aUL + #define HWRM_DBG_USEQ_RESP_HDR 0xff2bUL + #define HWRM_DBG_COREDUMP_CAPTURE 0xff2cUL + #define HWRM_DBG_PTRACE 0xff2dUL + #define HWRM_DBG_SIM_CABLE_STATE 0xff2eUL + #define HWRM_NVM_GET_VPD_FIELD_INFO 0xffeaUL + #define HWRM_NVM_SET_VPD_FIELD_INFO 0xffebUL + #define HWRM_NVM_DEFRAG 0xffecUL + #define HWRM_NVM_REQ_ARBITRATION 0xffedUL + #define HWRM_NVM_FACTORY_DEFAULTS 0xffeeUL + #define HWRM_NVM_VALIDATE_OPTION 0xffefUL + #define HWRM_NVM_FLUSH 0xfff0UL + #define HWRM_NVM_GET_VARIABLE 0xfff1UL + #define HWRM_NVM_SET_VARIABLE 0xfff2UL + #define HWRM_NVM_INSTALL_UPDATE 0xfff3UL + #define HWRM_NVM_MODIFY 0xfff4UL + #define HWRM_NVM_VERIFY_UPDATE 0xfff5UL + #define HWRM_NVM_GET_DEV_INFO 0xfff6UL + #define HWRM_NVM_ERASE_DIR_ENTRY 0xfff7UL + #define HWRM_NVM_MOD_DIR_ENTRY 0xfff8UL + #define HWRM_NVM_FIND_DIR_ENTRY 0xfff9UL + #define HWRM_NVM_GET_DIR_ENTRIES 0xfffaUL + #define HWRM_NVM_GET_DIR_INFO 0xfffbUL + #define HWRM_NVM_RAW_DUMP 0xfffcUL + #define HWRM_NVM_READ 0xfffdUL + #define HWRM_NVM_WRITE 0xfffeUL + #define HWRM_NVM_RAW_WRITE_BLK 0xffffUL + #define HWRM_LAST HWRM_NVM_RAW_WRITE_BLK + __le16 unused_0[3]; +}; + +/* ret_codes (size:64b/8B) */ +struct ret_codes { + __le16 error_code; + #define HWRM_ERR_CODE_SUCCESS 0x0UL + #define HWRM_ERR_CODE_FAIL 0x1UL + #define HWRM_ERR_CODE_INVALID_PARAMS 0x2UL + #define HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED 0x3UL + #define HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR 0x4UL + #define HWRM_ERR_CODE_INVALID_FLAGS 0x5UL + #define HWRM_ERR_CODE_INVALID_ENABLES 0x6UL + #define HWRM_ERR_CODE_UNSUPPORTED_TLV 0x7UL + #define HWRM_ERR_CODE_NO_BUFFER 0x8UL + #define HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR 0x9UL + #define HWRM_ERR_CODE_HOT_RESET_PROGRESS 0xaUL + #define HWRM_ERR_CODE_HOT_RESET_FAIL 0xbUL + #define HWRM_ERR_CODE_NO_FLOW_COUNTER_DURING_ALLOC 0xcUL + #define HWRM_ERR_CODE_KEY_HASH_COLLISION 0xdUL + #define HWRM_ERR_CODE_KEY_ALREADY_EXISTS 0xeUL + #define HWRM_ERR_CODE_HWRM_ERROR 0xfUL + #define HWRM_ERR_CODE_BUSY 0x10UL + #define HWRM_ERR_CODE_RESOURCE_LOCKED 0x11UL + #define HWRM_ERR_CODE_PF_UNAVAILABLE 0x12UL + #define HWRM_ERR_CODE_ENTITY_NOT_PRESENT 0x13UL + #define HWRM_ERR_CODE_SECURE_SOC_ERROR 0x14UL + #define HWRM_ERR_CODE_TLV_ENCAPSULATED_RESPONSE 0x8000UL + #define HWRM_ERR_CODE_UNKNOWN_ERR 0xfffeUL + #define HWRM_ERR_CODE_CMD_NOT_SUPPORTED 0xffffUL + #define HWRM_ERR_CODE_LAST HWRM_ERR_CODE_CMD_NOT_SUPPORTED + __le16 unused_0[3]; +}; + +/* hwrm_err_output (size:128b/16B) */ +struct hwrm_err_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 opaque_0; + __le16 opaque_1; + u8 cmd_err; + u8 valid; +}; +#define HWRM_NA_SIGNATURE ((__le32)(-1)) +#define HWRM_MAX_REQ_LEN 128 +#define HWRM_MAX_RESP_LEN 704 +#define HW_HASH_INDEX_SIZE 0x80 +#define HW_HASH_KEY_SIZE 40 +#define HWRM_RESP_VALID_KEY 1 +#define HWRM_TARGET_ID_BONO 0xFFF8 +#define HWRM_TARGET_ID_KONG 0xFFF9 +#define HWRM_TARGET_ID_APE 0xFFFA +#define HWRM_TARGET_ID_TOOLS 0xFFFD +#define HWRM_VERSION_MAJOR 1 +#define HWRM_VERSION_MINOR 10 +#define HWRM_VERSION_UPDATE 3 +#define HWRM_VERSION_RSVD 97 +#define HWRM_VERSION_STR "1.10.3.97" + +/* hwrm_ver_get_input (size:192b/24B) */ +struct hwrm_ver_get_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 hwrm_intf_maj; + u8 hwrm_intf_min; + u8 hwrm_intf_upd; + u8 unused_0[5]; +}; + +/* hwrm_ver_get_output (size:1408b/176B) */ +struct hwrm_ver_get_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 hwrm_intf_maj_8b; + u8 hwrm_intf_min_8b; + u8 hwrm_intf_upd_8b; + u8 hwrm_intf_rsvd_8b; + u8 hwrm_fw_maj_8b; + u8 hwrm_fw_min_8b; + u8 hwrm_fw_bld_8b; + u8 hwrm_fw_rsvd_8b; + u8 mgmt_fw_maj_8b; + u8 mgmt_fw_min_8b; + u8 mgmt_fw_bld_8b; + u8 mgmt_fw_rsvd_8b; + u8 netctrl_fw_maj_8b; + u8 netctrl_fw_min_8b; + u8 netctrl_fw_bld_8b; + u8 netctrl_fw_rsvd_8b; + __le32 dev_caps_cfg; + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED 0x1UL + #define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED 0x2UL + #define VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_SUPPORTED 0x4UL + #define VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_REQUIRED 0x8UL + #define VER_GET_RESP_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED 0x10UL + #define VER_GET_RESP_DEV_CAPS_CFG_FLOW_HANDLE_64BIT_SUPPORTED 0x20UL + #define VER_GET_RESP_DEV_CAPS_CFG_L2_FILTER_TYPES_ROCE_OR_L2_SUPPORTED 0x40UL + #define VER_GET_RESP_DEV_CAPS_CFG_VIRTIO_VSWITCH_OFFLOAD_SUPPORTED 0x80UL + #define VER_GET_RESP_DEV_CAPS_CFG_TRUSTED_VF_SUPPORTED 0x100UL + #define VER_GET_RESP_DEV_CAPS_CFG_FLOW_AGING_SUPPORTED 0x200UL + #define VER_GET_RESP_DEV_CAPS_CFG_ADV_FLOW_COUNTERS_SUPPORTED 0x400UL + #define VER_GET_RESP_DEV_CAPS_CFG_CFA_EEM_SUPPORTED 0x800UL + #define VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED 0x1000UL + #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL + #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL + u8 roce_fw_maj_8b; + u8 roce_fw_min_8b; + u8 roce_fw_bld_8b; + u8 roce_fw_rsvd_8b; + char hwrm_fw_name[16]; + char mgmt_fw_name[16]; + char netctrl_fw_name[16]; + char active_pkg_name[16]; + char roce_fw_name[16]; + __le16 chip_num; + u8 chip_rev; + u8 chip_metal; + u8 chip_bond_id; + u8 chip_platform_type; + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC 0x0UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA 0x1UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM 0x2UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_LAST VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM + __le16 max_req_win_len; + __le16 max_resp_len; + __le16 def_req_timeout; + u8 flags; + #define VER_GET_RESP_FLAGS_DEV_NOT_RDY 0x1UL + #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL 0x2UL + #define VER_GET_RESP_FLAGS_DEV_NOT_RDY_BACKING_STORE 0x4UL + u8 unused_0[2]; + u8 always_1; + __le16 hwrm_intf_major; + __le16 hwrm_intf_minor; + __le16 hwrm_intf_build; + __le16 hwrm_intf_patch; + __le16 hwrm_fw_major; + __le16 hwrm_fw_minor; + __le16 hwrm_fw_build; + __le16 hwrm_fw_patch; + __le16 mgmt_fw_major; + __le16 mgmt_fw_minor; + __le16 mgmt_fw_build; + __le16 mgmt_fw_patch; + __le16 netctrl_fw_major; + __le16 netctrl_fw_minor; + __le16 netctrl_fw_build; + __le16 netctrl_fw_patch; + __le16 roce_fw_major; + __le16 roce_fw_minor; + __le16 roce_fw_build; + __le16 roce_fw_patch; + __le16 max_ext_req_len; + __le16 max_req_timeout; + u8 unused_1[3]; + u8 valid; +}; + +/* eject_cmpl (size:128b/16B) */ +struct eject_cmpl { + __le16 type; + #define EJECT_CMPL_TYPE_MASK 0x3fUL + #define EJECT_CMPL_TYPE_SFT 0 + #define EJECT_CMPL_TYPE_STAT_EJECT 0x1aUL + #define EJECT_CMPL_TYPE_LAST EJECT_CMPL_TYPE_STAT_EJECT + #define EJECT_CMPL_FLAGS_MASK 0xffc0UL + #define EJECT_CMPL_FLAGS_SFT 6 + #define EJECT_CMPL_FLAGS_ERROR 0x40UL + __le16 len; + __le32 opaque; + __le16 v; + #define EJECT_CMPL_V 0x1UL + #define EJECT_CMPL_ERRORS_MASK 0xfffeUL + #define EJECT_CMPL_ERRORS_SFT 1 + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_MASK 0xeUL + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_SFT 1 + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_NO_BUFFER (0x0UL << 1) + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_DID_NOT_FIT (0x1UL << 1) + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_BAD_FORMAT (0x3UL << 1) + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_FLUSH (0x5UL << 1) + #define EJECT_CMPL_ERRORS_BUFFER_ERROR_LAST EJECT_CMPL_ERRORS_BUFFER_ERROR_FLUSH + __le16 reserved16; + __le32 unused_2; +}; + +/* hwrm_cmpl (size:128b/16B) */ +struct hwrm_cmpl { + __le16 type; + #define CMPL_TYPE_MASK 0x3fUL + #define CMPL_TYPE_SFT 0 + #define CMPL_TYPE_HWRM_DONE 0x20UL + #define CMPL_TYPE_LAST CMPL_TYPE_HWRM_DONE + __le16 sequence_id; + __le32 unused_1; + __le32 v; + #define CMPL_V 0x1UL + __le32 unused_3; +}; + +/* hwrm_fwd_req_cmpl (size:128b/16B) */ +struct hwrm_fwd_req_cmpl { + __le16 req_len_type; + #define FWD_REQ_CMPL_TYPE_MASK 0x3fUL + #define FWD_REQ_CMPL_TYPE_SFT 0 + #define FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ 0x22UL + #define FWD_REQ_CMPL_TYPE_LAST FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ + #define FWD_REQ_CMPL_REQ_LEN_MASK 0xffc0UL + #define FWD_REQ_CMPL_REQ_LEN_SFT 6 + __le16 source_id; + __le32 unused0; + __le32 req_buf_addr_v[2]; + #define FWD_REQ_CMPL_V 0x1UL + #define FWD_REQ_CMPL_REQ_BUF_ADDR_MASK 0xfffffffeUL + #define FWD_REQ_CMPL_REQ_BUF_ADDR_SFT 1 +}; + +/* hwrm_fwd_resp_cmpl (size:128b/16B) */ +struct hwrm_fwd_resp_cmpl { + __le16 type; + #define FWD_RESP_CMPL_TYPE_MASK 0x3fUL + #define FWD_RESP_CMPL_TYPE_SFT 0 + #define FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP 0x24UL + #define FWD_RESP_CMPL_TYPE_LAST FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP + __le16 source_id; + __le16 resp_len; + __le16 unused_1; + __le32 resp_buf_addr_v[2]; + #define FWD_RESP_CMPL_V 0x1UL + #define FWD_RESP_CMPL_RESP_BUF_ADDR_MASK 0xfffffffeUL + #define FWD_RESP_CMPL_RESP_BUF_ADDR_SFT 1 +}; + +/* hwrm_async_event_cmpl (size:128b/16B) */ +struct hwrm_async_event_cmpl { + __le16 type; + #define ASYNC_EVENT_CMPL_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_TYPE_LAST ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE 0x1UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE 0x2UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL + #define ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY 0x8UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY 0x9UL + #define ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG 0xaUL + #define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL + #define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD 0x11UL + #define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD 0x20UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD 0x21UL + #define ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR 0x30UL + #define ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL + #define ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LLFC_PFC_CHANGE 0x34UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DEFAULT_VNIC_CHANGE 0x35UL + #define ASYNC_EVENT_CMPL_EVENT_ID_HW_FLOW_AGED 0x36UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION 0x37UL + #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CACHE_FLUSH_REQ 0x38UL + #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CACHE_FLUSH_DONE 0x39UL + #define ASYNC_EVENT_CMPL_EVENT_ID_TCP_FLAG_ACTION_CHANGE 0x3aUL + #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_FLOW_ACTIVE 0x3bUL + #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CFG_CHANGE 0x3cUL + #define ASYNC_EVENT_CMPL_EVENT_ID_TFLIB_DEFAULT_VNIC_CHANGE 0x3dUL + #define ASYNC_EVENT_CMPL_EVENT_ID_TFLIB_LINK_STATUS_CHANGE 0x3eUL + #define ASYNC_EVENT_CMPL_EVENT_ID_QUIESCE_DONE 0x3fUL + #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST 0x42UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE 0x43UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP 0x44UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD 0x46UL + #define ASYNC_EVENT_CMPL_EVENT_ID_RSS_CHANGE 0x47UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE 0x48UL + #define ASYNC_EVENT_CMPL_EVENT_ID_HW_DOORBELL_RECOVERY_READ_ERROR 0x49UL + #define ASYNC_EVENT_CMPL_EVENT_ID_CTX_ERROR 0x4aUL + #define ASYNC_EVENT_CMPL_EVENT_ID_UDCC_SESSION_CHANGE 0x4bUL + #define ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER 0x4cUL + #define ASYNC_EVENT_CMPL_EVENT_ID_PEER_MMAP_CHANGE 0x4dUL + #define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE 0x4eUL + #define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE 0x4fUL + #define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP 0x50UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL + #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL + #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_V 0x1UL + #define ASYNC_EVENT_CMPL_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; +}; + +/* hwrm_async_event_cmpl_link_status_change (size:128b/16B) */ +struct hwrm_async_event_cmpl_link_status_change { + __le16 type; + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_LAST ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LAST ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V 0x1UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE 0x1UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_DOWN 0x0UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_UP 0x1UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_LAST ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_UP + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_MASK 0xeUL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_SFT 1 + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_ID_SFT 4 + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PF_ID_MASK 0xff00000UL + #define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PF_ID_SFT 20 +}; + +/* hwrm_async_event_cmpl_port_conn_not_allowed (size:128b/16B) */ +struct hwrm_async_event_cmpl_port_conn_not_allowed { + __le16 type; + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_LAST ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_LAST ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V 0x1UL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_SFT 0 + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_MASK 0xff0000UL + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_SFT 16 + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_NONE (0x0UL << 16) + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_DISABLETX (0x1UL << 16) + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_WARNINGMSG (0x2UL << 16) + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_PWRDOWN (0x3UL << 16) + #define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_LAST ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_PWRDOWN +}; + +/* hwrm_async_event_cmpl_link_speed_cfg_change (size:128b/16B) */ +struct hwrm_async_event_cmpl_link_speed_cfg_change { + __le16 type; + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_LAST ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LAST ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V 0x1UL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_PORT_ID_SFT 0 + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_SUPPORTED_LINK_SPEEDS_CHANGE 0x10000UL + #define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_ILLEGAL_LINK_SPEED_CFG 0x20000UL +}; + +/* hwrm_async_event_cmpl_reset_notify (size:128b/16B) */ +struct hwrm_async_event_cmpl_reset_notify { + __le16 type; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY 0x8UL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY + __le32 event_data2; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_SFT 0 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_V 0x1UL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_MASK 0xffUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_SFT 0 + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_STOP_TX_QUEUE 0x1UL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_IFDOWN 0x2UL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_IFDOWN + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK 0xff00UL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_SFT 8 + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST (0x1UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL (0x2UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL (0x3UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET (0x4UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION (0x5UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK 0xffff0000UL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16 +}; + +/* hwrm_async_event_cmpl_error_recovery (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_recovery { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_ERROR_RECOVERY 0x9UL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_ERROR_RECOVERY + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED 0x2UL +}; + +/* hwrm_async_event_cmpl_ring_monitor_msg (size:128b/16B) */ +struct hwrm_async_event_cmpl_ring_monitor_msg { + __le16 type; + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_LAST ASYNC_EVENT_CMPL_RING_MONITOR_MSG_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_ID_RING_MONITOR_MSG 0xaUL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_ID_LAST ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_ID_RING_MONITOR_MSG + __le32 event_data2; + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_TX 0x0UL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX 0x1UL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_CMPL 0x2UL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_LAST ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_CMPL + u8 opaque_v; + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_V 0x1UL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_RING_MONITOR_MSG_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; +}; + +/* hwrm_async_event_cmpl_vf_cfg_change (size:128b/16B) */ +struct hwrm_async_event_cmpl_vf_cfg_change { + __le16 type; + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_LAST ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_LAST ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE + __le32 event_data2; + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA2_VF_ID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA2_VF_ID_SFT 0 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V 0x1UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MTU_CHANGE 0x1UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MRU_CHANGE 0x2UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_MAC_ADDR_CHANGE 0x4UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_VLAN_CHANGE 0x8UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_TRUSTED_VF_CFG_CHANGE 0x10UL + #define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_TF_OWNERSHIP_RELEASE 0x20UL +}; + +/* hwrm_async_event_cmpl_default_vnic_change (size:128b/16B) */ +struct hwrm_async_event_cmpl_default_vnic_change { + __le16 type; + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_LAST ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_HWRM_ASYNC_EVENT + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_UNUSED1_MASK 0xffc0UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_UNUSED1_SFT 6 + __le16 event_id; + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_ID_ALLOC_FREE_NOTIFICATION 0x35UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_ID_LAST ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_ID_ALLOC_FREE_NOTIFICATION + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_V 0x1UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_MASK 0x3UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_SFT 0 + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_DEF_VNIC_ALLOC 0x1UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_DEF_VNIC_FREE 0x2UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_LAST ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_DEF_VNIC_FREE + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_PF_ID_MASK 0x3fcUL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_PF_ID_SFT 2 + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_VF_ID_MASK 0x3fffc00UL + #define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_VF_ID_SFT 10 +}; + +/* hwrm_async_event_cmpl_hw_flow_aged (size:128b/16B) */ +struct hwrm_async_event_cmpl_hw_flow_aged { + __le16 type; + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_LAST ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_ID_HW_FLOW_AGED 0x36UL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_ID_LAST ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_ID_HW_FLOW_AGED + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_V 0x1UL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_ID_MASK 0x7fffffffUL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_ID_SFT 0 + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION 0x80000000UL + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_RX (0x0UL << 31) + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_TX (0x1UL << 31) + #define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_LAST ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_TX +}; + +/* hwrm_async_event_cmpl_eem_cache_flush_req (size:128b/16B) */ +struct hwrm_async_event_cmpl_eem_cache_flush_req { + __le16 type; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_LAST ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_EVENT_ID_EEM_CACHE_FLUSH_REQ 0x38UL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_EVENT_ID_LAST ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_EVENT_ID_EEM_CACHE_FLUSH_REQ + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_V 0x1UL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; +}; + +/* hwrm_async_event_cmpl_eem_cache_flush_done (size:128b/16B) */ +struct hwrm_async_event_cmpl_eem_cache_flush_done { + __le16 type; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_LAST ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_ID_EEM_CACHE_FLUSH_DONE 0x39UL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_ID_LAST ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_ID_EEM_CACHE_FLUSH_DONE + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_V 0x1UL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_DATA1_FID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_DATA1_FID_SFT 0 +}; + +/* hwrm_async_event_cmpl_deferred_response (size:128b/16B) */ +struct hwrm_async_event_cmpl_deferred_response { + __le16 type; + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_LAST ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_ID_DEFERRED_RESPONSE 0x40UL + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_ID_LAST ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_ID_DEFERRED_RESPONSE + __le32 event_data2; + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_DATA2_SEQ_ID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_EVENT_DATA2_SEQ_ID_SFT 0 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_V 0x1UL + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_DEFERRED_RESPONSE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; +}; + +/* hwrm_async_event_cmpl_echo_request (size:128b/16B) */ +struct hwrm_async_event_cmpl_echo_request { + __le16 type; + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_TYPE_LAST ASYNC_EVENT_CMPL_ECHO_REQUEST_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_EVENT_ID_ECHO_REQUEST 0x42UL + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_EVENT_ID_LAST ASYNC_EVENT_CMPL_ECHO_REQUEST_EVENT_ID_ECHO_REQUEST + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_V 0x1UL + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ECHO_REQUEST_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; +}; + +/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */ +struct hwrm_async_event_cmpl_phc_update { + __le16 type; + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE + __le32 event_data2; + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT 16 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_PHC_UPDATE_V 0x1UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK 0xfUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE 0x4UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK 0xffff0UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT 4 +}; + +/* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */ +struct hwrm_async_event_cmpl_pps_timestamp { + __le16 type; + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_LAST ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_ID_PPS_TIMESTAMP 0x44UL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_ID_LAST ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_ID_PPS_TIMESTAMP + __le32 event_data2; + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE 0x1UL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_INTERNAL 0x0UL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL 0x1UL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_LAST ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_MASK 0xeUL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_SFT 1 + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_MASK 0xffff0UL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_SFT 4 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_V 0x1UL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_MASK 0xffffffffUL + #define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_SFT 0 +}; + +/* hwrm_async_event_cmpl_error_report (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_DATA1_ERROR_TYPE_SFT 0 +}; + +/* hwrm_async_event_cmpl_dbg_buf_producer (size:128b/16B) */ +struct hwrm_async_event_cmpl_dbg_buf_producer { + __le16 type; + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_ID_DBG_BUF_PRODUCER 0x4cUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_ID_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_ID_DBG_BUF_PRODUCER + __le32 event_data2; + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA2_CURR_OFF_MASK 0xffffffffUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA2_CURR_OFF_SFT 0 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_V 0x1UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_SRT_TRACE 0x0UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_SRT2_TRACE 0x1UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CRT_TRACE 0x2UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CRT2_TRACE 0x3UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP0_TRACE 0x4UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_L2_HWRM_TRACE 0x5UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ROCE_HWRM_TRACE 0x6UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA0_TRACE 0x7UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA1_TRACE 0x8UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA2_TRACE 0x9UL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP1_TRACE 0xaUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE 0xbUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE +}; + +/* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */ +struct hwrm_async_event_cmpl_hwrm_error { + __le16 type; + #define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_LAST ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR + __le32 event_data2; + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0 + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL + u8 opaque_v; + #define ASYNC_EVENT_CMPL_HWRM_ERROR_V 0x1UL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL +}; + +/* hwrm_async_event_cmpl_error_report_base (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_base { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED +}; + +/* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_pause_storm { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM +}; + +/* hwrm_async_event_cmpl_error_report_invalid_signal (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_invalid_signal { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_ID_ERROR_REPORT + __le32 event_data2; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT 0 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL +}; + +/* hwrm_async_event_cmpl_error_report_nvm (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_nvm { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_ID_ERROR_REPORT + __le32 event_data2; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_MASK 0xffffffffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_SFT 0 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_NVM_ERROR 0x3UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_NVM_ERROR + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_MASK 0xff00UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_SFT 8 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_WRITE (0x1UL << 8) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE (0x2UL << 8) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE +}; + +/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_EPOCH_MASK 0xffffff00UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_EPOCH_SFT 8 +}; + +/* hwrm_async_event_cmpl_error_report_thermal (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_thermal { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_ID_ERROR_REPORT + __le32 event_data2; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK 0xff00UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT 8 + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_ERROR_TYPE_THERMAL_EVENT 0x5UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_ERROR_TYPE_THERMAL_EVENT + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK 0x700UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SFT 8 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN (0x0UL << 8) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL (0x1UL << 8) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL (0x2UL << 8) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN (0x3UL << 8) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR 0x800UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_DECREASING (0x0UL << 11) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING (0x1UL << 11) + #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING +}; + +/* hwrm_async_event_cmpl_error_report_dual_data_rate_not_supported (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_dual_data_rate_not_supported { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED +}; + +/* hwrm_func_reset_input (size:192b/24B) */ +struct hwrm_func_reset_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_RESET_REQ_ENABLES_VF_ID_VALID 0x1UL + __le16 vf_id; + u8 func_reset_level; + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL 0x0UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME 0x1UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN 0x2UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF 0x3UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_LAST FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF + u8 unused_0; +}; + +/* hwrm_func_reset_output (size:128b/16B) */ +struct hwrm_func_reset_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_getfid_input (size:192b/24B) */ +struct hwrm_func_getfid_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_GETFID_REQ_ENABLES_PCI_ID 0x1UL + __le16 pci_id; + u8 unused_0[2]; +}; + +/* hwrm_func_getfid_output (size:128b/16B) */ +struct hwrm_func_getfid_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 fid; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_func_vf_alloc_input (size:192b/24B) */ +struct hwrm_func_vf_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_VF_ALLOC_REQ_ENABLES_FIRST_VF_ID 0x1UL + __le16 first_vf_id; + __le16 num_vfs; +}; + +/* hwrm_func_vf_alloc_output (size:128b/16B) */ +struct hwrm_func_vf_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 first_vf_id; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_func_vf_free_input (size:192b/24B) */ +struct hwrm_func_vf_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_VF_FREE_REQ_ENABLES_FIRST_VF_ID 0x1UL + __le16 first_vf_id; + __le16 num_vfs; +}; + +/* hwrm_func_vf_free_output (size:128b/16B) */ +struct hwrm_func_vf_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_vf_cfg_input (size:576b/72B) */ +struct hwrm_func_vf_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_VF_CFG_REQ_ENABLES_MTU 0x1UL + #define FUNC_VF_CFG_REQ_ENABLES_GUEST_VLAN 0x2UL + #define FUNC_VF_CFG_REQ_ENABLES_ASYNC_EVENT_CR 0x4UL + #define FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR 0x8UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS 0x10UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS 0x20UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS 0x40UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS 0x80UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS 0x100UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS 0x200UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS 0x400UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS 0x800UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_KTLS_TX_KEY_CTXS 0x1000UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_KTLS_RX_KEY_CTXS 0x2000UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_QUIC_TX_KEY_CTXS 0x4000UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_QUIC_RX_KEY_CTXS 0x8000UL + __le16 mtu; + __le16 guest_vlan; + __le16 async_event_cr; + u8 dflt_mac_addr[6]; + __le32 flags; + #define FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST 0x1UL + #define FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST 0x2UL + #define FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST 0x4UL + #define FUNC_VF_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST 0x8UL + #define FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST 0x10UL + #define FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST 0x20UL + #define FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST 0x40UL + #define FUNC_VF_CFG_REQ_FLAGS_L2_CTX_ASSETS_TEST 0x80UL + #define FUNC_VF_CFG_REQ_FLAGS_PPP_PUSH_MODE_ENABLE 0x100UL + #define FUNC_VF_CFG_REQ_FLAGS_PPP_PUSH_MODE_DISABLE 0x200UL + __le16 num_rsscos_ctxs; + __le16 num_cmpl_rings; + __le16 num_tx_rings; + __le16 num_rx_rings; + __le16 num_l2_ctxs; + __le16 num_vnics; + __le16 num_stat_ctxs; + __le16 num_hw_ring_grps; + __le32 num_ktls_tx_key_ctxs; + __le32 num_ktls_rx_key_ctxs; + __le16 num_msix; + u8 unused[2]; + __le32 num_quic_tx_key_ctxs; + __le32 num_quic_rx_key_ctxs; +}; + +/* hwrm_func_vf_cfg_output (size:128b/16B) */ +struct hwrm_func_vf_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_qcaps_input (size:192b/24B) */ +struct hwrm_func_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 unused_0[6]; +}; + +/* hwrm_func_qcaps_output (size:1152b/144B) */ +struct hwrm_func_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 fid; + __le16 port_id; + __le32 flags; + #define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED 0x1UL + #define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING 0x2UL + #define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED 0x8UL + #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL + #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_STD_TX_RING_MODE_SUPPORTED 0x800UL + #define FUNC_QCAPS_RESP_FLAGS_GENEVE_TUN_FLAGS_SUPPORTED 0x1000UL + #define FUNC_QCAPS_RESP_FLAGS_NVGRE_TUN_FLAGS_SUPPORTED 0x2000UL + #define FUNC_QCAPS_RESP_FLAGS_GRE_TUN_FLAGS_SUPPORTED 0x4000UL + #define FUNC_QCAPS_RESP_FLAGS_MPLS_TUN_FLAGS_SUPPORTED 0x8000UL + #define FUNC_QCAPS_RESP_FLAGS_PCIE_STATS_SUPPORTED 0x10000UL + #define FUNC_QCAPS_RESP_FLAGS_ADOPTED_PF_SUPPORTED 0x20000UL + #define FUNC_QCAPS_RESP_FLAGS_ADMIN_PF_SUPPORTED 0x40000UL + #define FUNC_QCAPS_RESP_FLAGS_LINK_ADMIN_STATUS_SUPPORTED 0x80000UL + #define FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE 0x100000UL + #define FUNC_QCAPS_RESP_FLAGS_DYNAMIC_TX_RING_ALLOC 0x200000UL + #define FUNC_QCAPS_RESP_FLAGS_HOT_RESET_CAPABLE 0x400000UL + #define FUNC_QCAPS_RESP_FLAGS_ERROR_RECOVERY_CAPABLE 0x800000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_STATS_SUPPORTED 0x1000000UL + #define FUNC_QCAPS_RESP_FLAGS_ERR_RECOVER_RELOAD 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_NOTIFY_VF_DEF_VNIC_CHNG_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_VLAN_ACCELERATION_TX_DISABLED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_COREDUMP_CMD_SUPPORTED 0x10000000UL + #define FUNC_QCAPS_RESP_FLAGS_CRASHDUMP_CMD_SUPPORTED 0x20000000UL + #define FUNC_QCAPS_RESP_FLAGS_PFC_WD_STATS_SUPPORTED 0x40000000UL + #define FUNC_QCAPS_RESP_FLAGS_DBG_QCAPS_CMD_SUPPORTED 0x80000000UL + u8 mac_address[6]; + __le16 max_rsscos_ctx; + __le16 max_cmpl_rings; + __le16 max_tx_rings; + __le16 max_rx_rings; + __le16 max_l2_ctxs; + __le16 max_vnics; + __le16 first_vf_id; + __le16 max_vfs; + __le16 max_stat_ctx; + __le32 max_encap_records; + __le32 max_decap_records; + __le32 max_tx_em_flows; + __le32 max_tx_wm_flows; + __le32 max_rx_em_flows; + __le32 max_rx_wm_flows; + __le32 max_mcast_filters; + __le32 max_flow_id; + __le32 max_hw_ring_grps; + __le16 max_sp_tx_rings; + __le16 max_msix_vfs; + __le32 flags_ext; + #define FUNC_QCAPS_RESP_FLAGS_EXT_ECN_MARK_SUPPORTED 0x1UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_ECN_STATS_SUPPORTED 0x2UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT 0x8UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PROXY_MODE_SUPPORT 0x10UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_PROXY_SRC_INTF_OVERRIDE_SUPPORT 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_SCHQ_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_EVB_MODE_CFG_NOT_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_SOC_SPD_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_FAST_RESET_CAPABLE 0x800UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_METADATA_CFG_CAPABLE 0x1000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_NVM_OPTION_ACTION_SUPPORTED 0x2000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BD_METADATA_SUPPORTED 0x4000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_ECHO_REQUEST_SUPPORTED 0x8000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_NPAR_1_2_SUPPORTED 0x10000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PTM_SUPPORTED 0x20000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED 0x40000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_VF_CFG_ASYNC_FOR_PF_SUPPORTED 0x80000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PARTITION_BW_SUPPORTED 0x100000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED 0x200000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_KTLS_SUPPORTED 0x400000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL 0x800000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED 0x1000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED 0x10000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED 0x20000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_HW_DBR_DROP_RECOV_SUPPORTED 0x40000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_DISABLE_CQ_OVERFLOW_DETECTION_SUPPORTED 0x80000000UL + u8 max_schqs; + u8 mpc_chnls_cap; + #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL + #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RCE 0x2UL + #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TE_CFA 0x4UL + #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RE_CFA 0x8UL + #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_PRIMATE 0x10UL + __le16 max_key_ctxs_alloc; + __le32 flags_ext2; + #define FUNC_QCAPS_RESP_FLAGS_EXT2_RX_ALL_PKTS_TIMESTAMPS_SUPPORTED 0x1UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_QUIC_SUPPORTED 0x2UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_KDNET_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED 0x8UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SW_DBR_DROP_RECOVERY_SUPPORTED 0x10UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_GENERIC_STATS_SUPPORTED 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_UDP_GSO_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SYNCE_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_HW_LAG_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_ON_CHIP_CTX_SUPPORTED 0x800UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_STEERING_TAG_SUPPORTED 0x1000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_ENHANCED_VF_SCALE_SUPPORTED 0x2000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_KEY_XID_PARTITION_SUPPORTED 0x4000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_CONCURRENT_KTLS_QUIC_SUPPORTED 0x8000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SCHQ_CROSS_TC_CAP_SUPPORTED 0x10000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SCHQ_PER_TC_CAP_SUPPORTED 0x20000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SCHQ_PER_TC_RESERVATION_SUPPORTED 0x40000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_DB_ERROR_STATS_SUPPORTED 0x80000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED 0x100000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_UDCC_SUPPORTED 0x200000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TIMED_TX_SO_TXTIME_SUPPORTED 0x400000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SW_MAX_RESOURCE_LIMITS_SUPPORTED 0x800000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_INGRESS_NIC_FLOW_SUPPORTED 0x1000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_LPBK_STATS_SUPPORTED 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_EGRESS_NIC_FLOW_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_MULTI_LOSSLESS_QUEUES_SUPPORTED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_PEER_MMAP_SUPPORTED 0x10000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TIMED_TX_PACING_SUPPORTED 0x20000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_VF_STAT_EJECTION_SUPPORTED 0x40000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_HOST_COREDUMP_SUPPORTED 0x80000000UL + __le16 tunnel_disable_flag; + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN 0x1UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NGE 0x2UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NVGRE 0x4UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_L2GRE 0x8UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_GRE 0x10UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_IPINIP 0x20UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_MPLS 0x40UL + #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_PPPOE 0x80UL + __le16 xid_partition_cap; + #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_TX_CK 0x1UL + #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_RX_CK 0x2UL + u8 device_serial_number[8]; + __le16 ctxs_per_partition; + __le16 max_tso_segs; + __le32 roce_vf_max_av; + __le32 roce_vf_max_cq; + __le32 roce_vf_max_mrw; + __le32 roce_vf_max_qp; + __le32 roce_vf_max_srq; + __le32 roce_vf_max_gid; + __le32 flags_ext3; + #define FUNC_QCAPS_RESP_FLAGS_EXT3_RM_RSV_WHILE_ALLOC_CAP 0x1UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_REQUIRE_L2_FILTER 0x2UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MAX_ROCE_VFS_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED 0x8UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED 0x10UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED 0x20UL + __le16 max_roce_vfs; + __le16 max_crypto_rx_flow_filters; + u8 unused_3[3]; + u8 valid; +}; + +/* hwrm_func_qcfg_input (size:192b/24B) */ +struct hwrm_func_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 unused_0[6]; +}; + +/* hwrm_func_qcfg_output (size:1344b/168B) */ +struct hwrm_func_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 fid; + __le16 port_id; + __le16 vlan; + __le16 flags; + #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED 0x1UL + #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED 0x2UL + #define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED 0x4UL + #define FUNC_QCFG_RESP_FLAGS_STD_TX_RING_MODE_ENABLED 0x8UL + #define FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED 0x10UL + #define FUNC_QCFG_RESP_FLAGS_MULTI_HOST 0x20UL + #define FUNC_QCFG_RESP_FLAGS_TRUSTED_VF 0x40UL + #define FUNC_QCFG_RESP_FLAGS_SECURE_MODE_ENABLED 0x80UL + #define FUNC_QCFG_RESP_FLAGS_PREBOOT_LEGACY_L2_RINGS 0x100UL + #define FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED 0x200UL + #define FUNC_QCFG_RESP_FLAGS_PPP_PUSH_MODE_ENABLED 0x400UL + #define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED 0x800UL + #define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED 0x1000UL + #define FUNC_QCFG_RESP_FLAGS_MULTI_ROOT 0x2000UL + #define FUNC_QCFG_RESP_FLAGS_ENABLE_RDMA_SRIOV 0x4000UL + #define FUNC_QCFG_RESP_FLAGS_ROCE_VNIC_ID_VALID 0x8000UL + u8 mac_address[6]; + __le16 pci_id; + __le16 alloc_rsscos_ctx; + __le16 alloc_cmpl_rings; + __le16 alloc_tx_rings; + __le16 alloc_rx_rings; + __le16 alloc_l2_ctx; + __le16 alloc_vnics; + __le16 admin_mtu; + __le16 mru; + __le16 stat_ctx_id; + u8 port_partition_type; + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF 0x0UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS 0x1UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 0x2UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 0x3UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 0x4UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_2 0x5UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN 0xffUL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_LAST FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN + u8 port_pf_cnt; + #define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL 0x0UL + #define FUNC_QCFG_RESP_PORT_PF_CNT_LAST FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL + __le16 dflt_vnic_id; + __le16 max_mtu_configured; + __le32 min_bw; + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MIN_BW_SCALE 0x10000000UL + #define FUNC_QCFG_RESP_MIN_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_QCFG_RESP_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_QCFG_RESP_MIN_BW_SCALE_LAST FUNC_QCFG_RESP_MIN_BW_SCALE_BYTES + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID + __le32 max_bw; + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MAX_BW_SCALE 0x10000000UL + #define FUNC_QCFG_RESP_MAX_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_QCFG_RESP_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_QCFG_RESP_MAX_BW_SCALE_LAST FUNC_QCFG_RESP_MAX_BW_SCALE_BYTES + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID + u8 evb_mode; + #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL + #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL + #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL + #define FUNC_QCFG_RESP_EVB_MODE_LAST FUNC_QCFG_RESP_EVB_MODE_VEPA + u8 options; + #define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_MASK 0x3UL + #define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SFT 0 + #define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SIZE_64 0x0UL + #define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SIZE_128 0x1UL + #define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_LAST FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SIZE_128 + #define FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_MASK 0xcUL + #define FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_SFT 2 + #define FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_FORCED_DOWN (0x0UL << 2) + #define FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_FORCED_UP (0x1UL << 2) + #define FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_AUTO (0x2UL << 2) + #define FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_LAST FUNC_QCFG_RESP_OPTIONS_LINK_ADMIN_STATE_AUTO + #define FUNC_QCFG_RESP_OPTIONS_RSVD_MASK 0xf0UL + #define FUNC_QCFG_RESP_OPTIONS_RSVD_SFT 4 + __le16 alloc_vfs; + __le32 alloc_mcast_filters; + __le32 alloc_hw_ring_grps; + __le16 alloc_sp_tx_rings; + __le16 alloc_stat_ctx; + __le16 alloc_msix; + __le16 registered_vfs; + __le16 l2_doorbell_bar_size_kb; + u8 active_endpoints; + u8 always_1; + __le32 reset_addr_poll; + __le16 legacy_l2_db_size_kb; + __le16 svif_info; + #define FUNC_QCFG_RESP_SVIF_INFO_SVIF_MASK 0x7fffUL + #define FUNC_QCFG_RESP_SVIF_INFO_SVIF_SFT 0 + #define FUNC_QCFG_RESP_SVIF_INFO_SVIF_VALID 0x8000UL + u8 mpc_chnls; + #define FUNC_QCFG_RESP_MPC_CHNLS_TCE_ENABLED 0x1UL + #define FUNC_QCFG_RESP_MPC_CHNLS_RCE_ENABLED 0x2UL + #define FUNC_QCFG_RESP_MPC_CHNLS_TE_CFA_ENABLED 0x4UL + #define FUNC_QCFG_RESP_MPC_CHNLS_RE_CFA_ENABLED 0x8UL + #define FUNC_QCFG_RESP_MPC_CHNLS_PRIMATE_ENABLED 0x10UL + u8 db_page_size; + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_4KB 0x0UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_8KB 0x1UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_16KB 0x2UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_32KB 0x3UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_64KB 0x4UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_128KB 0x5UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_256KB 0x6UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_512KB 0x7UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_1MB 0x8UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_2MB 0x9UL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_4MB 0xaUL + #define FUNC_QCFG_RESP_DB_PAGE_SIZE_LAST FUNC_QCFG_RESP_DB_PAGE_SIZE_4MB + __le16 roce_vnic_id; + __le32 partition_min_bw; + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE 0x10000000UL + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_LAST FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_BYTES + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 + __le32 partition_max_bw; + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE 0x10000000UL + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_LAST FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_BYTES + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 + __le16 host_mtu; + __le16 flags2; + #define FUNC_QCFG_RESP_FLAGS2_SRIOV_DSCP_INSERT_ENABLED 0x1UL + __le16 stag_vid; + u8 port_kdnet_mode; + #define FUNC_QCFG_RESP_PORT_KDNET_MODE_DISABLED 0x0UL + #define FUNC_QCFG_RESP_PORT_KDNET_MODE_ENABLED 0x1UL + #define FUNC_QCFG_RESP_PORT_KDNET_MODE_LAST FUNC_QCFG_RESP_PORT_KDNET_MODE_ENABLED + u8 kdnet_pcie_function; + __le16 port_kdnet_fid; + u8 unused_5; + u8 roce_bidi_opt_mode; + #define FUNC_QCFG_RESP_ROCE_BIDI_OPT_MODE_DISABLED 0x1UL + #define FUNC_QCFG_RESP_ROCE_BIDI_OPT_MODE_DEDICATED 0x2UL + #define FUNC_QCFG_RESP_ROCE_BIDI_OPT_MODE_SHARED 0x4UL + __le32 num_ktls_tx_key_ctxs; + __le32 num_ktls_rx_key_ctxs; + u8 lag_id; + u8 parif; + u8 fw_lag_id; + u8 unused_6; + __le32 num_quic_tx_key_ctxs; + __le32 num_quic_rx_key_ctxs; + __le32 roce_max_av_per_vf; + __le32 roce_max_cq_per_vf; + __le32 roce_max_mrw_per_vf; + __le32 roce_max_qp_per_vf; + __le32 roce_max_srq_per_vf; + __le32 roce_max_gid_per_vf; + __le16 xid_partition_cfg; + #define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL + #define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL + __le16 mirror_vnic_id; + u8 unused_7[7]; + u8 valid; +}; + +/* hwrm_func_cfg_input (size:1280b/160B) */ +struct hwrm_func_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + __le16 num_msix; + __le32 flags; + #define FUNC_CFG_REQ_FLAGS_SRC_MAC_ADDR_CHECK_DISABLE 0x1UL + #define FUNC_CFG_REQ_FLAGS_SRC_MAC_ADDR_CHECK_ENABLE 0x2UL + #define FUNC_CFG_REQ_FLAGS_RSVD_MASK 0x1fcUL + #define FUNC_CFG_REQ_FLAGS_RSVD_SFT 2 + #define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_ENABLE 0x200UL + #define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_DISABLE 0x400UL + #define FUNC_CFG_REQ_FLAGS_VIRT_MAC_PERSIST 0x800UL + #define FUNC_CFG_REQ_FLAGS_NO_AUTOCLEAR_STATISTIC 0x1000UL + #define FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST 0x2000UL + #define FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST 0x4000UL + #define FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST 0x8000UL + #define FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST 0x10000UL + #define FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST 0x20000UL + #define FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST 0x40000UL + #define FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST 0x80000UL + #define FUNC_CFG_REQ_FLAGS_L2_CTX_ASSETS_TEST 0x100000UL + #define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE 0x200000UL + #define FUNC_CFG_REQ_FLAGS_DYNAMIC_TX_RING_ALLOC 0x400000UL + #define FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST 0x800000UL + #define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE 0x1000000UL + #define FUNC_CFG_REQ_FLAGS_PREBOOT_LEGACY_L2_RINGS 0x2000000UL + #define FUNC_CFG_REQ_FLAGS_HOT_RESET_IF_EN_DIS 0x4000000UL + #define FUNC_CFG_REQ_FLAGS_PPP_PUSH_MODE_ENABLE 0x8000000UL + #define FUNC_CFG_REQ_FLAGS_PPP_PUSH_MODE_DISABLE 0x10000000UL + #define FUNC_CFG_REQ_FLAGS_BD_METADATA_ENABLE 0x20000000UL + #define FUNC_CFG_REQ_FLAGS_BD_METADATA_DISABLE 0x40000000UL + __le32 enables; + #define FUNC_CFG_REQ_ENABLES_ADMIN_MTU 0x1UL + #define FUNC_CFG_REQ_ENABLES_MRU 0x2UL + #define FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS 0x4UL + #define FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS 0x8UL + #define FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS 0x10UL + #define FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS 0x20UL + #define FUNC_CFG_REQ_ENABLES_NUM_L2_CTXS 0x40UL + #define FUNC_CFG_REQ_ENABLES_NUM_VNICS 0x80UL + #define FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS 0x100UL + #define FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR 0x200UL + #define FUNC_CFG_REQ_ENABLES_DFLT_VLAN 0x400UL + #define FUNC_CFG_REQ_ENABLES_DFLT_IP_ADDR 0x800UL + #define FUNC_CFG_REQ_ENABLES_MIN_BW 0x1000UL + #define FUNC_CFG_REQ_ENABLES_MAX_BW 0x2000UL + #define FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR 0x4000UL + #define FUNC_CFG_REQ_ENABLES_VLAN_ANTISPOOF_MODE 0x8000UL + #define FUNC_CFG_REQ_ENABLES_ALLOWED_VLAN_PRIS 0x10000UL + #define FUNC_CFG_REQ_ENABLES_EVB_MODE 0x20000UL + #define FUNC_CFG_REQ_ENABLES_NUM_MCAST_FILTERS 0x40000UL + #define FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS 0x80000UL + #define FUNC_CFG_REQ_ENABLES_CACHE_LINESIZE 0x100000UL + #define FUNC_CFG_REQ_ENABLES_NUM_MSIX 0x200000UL + #define FUNC_CFG_REQ_ENABLES_ADMIN_LINK_STATE 0x400000UL + #define FUNC_CFG_REQ_ENABLES_HOT_RESET_IF_SUPPORT 0x800000UL + #define FUNC_CFG_REQ_ENABLES_SCHQ_ID 0x1000000UL + #define FUNC_CFG_REQ_ENABLES_MPC_CHNLS 0x2000000UL + #define FUNC_CFG_REQ_ENABLES_PARTITION_MIN_BW 0x4000000UL + #define FUNC_CFG_REQ_ENABLES_PARTITION_MAX_BW 0x8000000UL + #define FUNC_CFG_REQ_ENABLES_TPID 0x10000000UL + #define FUNC_CFG_REQ_ENABLES_HOST_MTU 0x20000000UL + #define FUNC_CFG_REQ_ENABLES_KTLS_TX_KEY_CTXS 0x40000000UL + #define FUNC_CFG_REQ_ENABLES_KTLS_RX_KEY_CTXS 0x80000000UL + __le16 admin_mtu; + __le16 mru; + __le16 num_rsscos_ctxs; + __le16 num_cmpl_rings; + __le16 num_tx_rings; + __le16 num_rx_rings; + __le16 num_l2_ctxs; + __le16 num_vnics; + __le16 num_stat_ctxs; + __le16 num_hw_ring_grps; + u8 dflt_mac_addr[6]; + __le16 dflt_vlan; + __be32 dflt_ip_addr[4]; + __le32 min_bw; + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MIN_BW_SCALE 0x10000000UL + #define FUNC_CFG_REQ_MIN_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_CFG_REQ_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_CFG_REQ_MIN_BW_SCALE_LAST FUNC_CFG_REQ_MIN_BW_SCALE_BYTES + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID + __le32 max_bw; + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MAX_BW_SCALE 0x10000000UL + #define FUNC_CFG_REQ_MAX_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_CFG_REQ_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_CFG_REQ_MAX_BW_SCALE_LAST FUNC_CFG_REQ_MAX_BW_SCALE_BYTES + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID + __le16 async_event_cr; + u8 vlan_antispoof_mode; + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK 0x0UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN 0x1UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE 0x2UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN 0x3UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_LAST FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN + u8 allowed_vlan_pris; + u8 evb_mode; + #define FUNC_CFG_REQ_EVB_MODE_NO_EVB 0x0UL + #define FUNC_CFG_REQ_EVB_MODE_VEB 0x1UL + #define FUNC_CFG_REQ_EVB_MODE_VEPA 0x2UL + #define FUNC_CFG_REQ_EVB_MODE_LAST FUNC_CFG_REQ_EVB_MODE_VEPA + u8 options; + #define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_MASK 0x3UL + #define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SFT 0 + #define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64 0x0UL + #define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128 0x1UL + #define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_LAST FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128 + #define FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_MASK 0xcUL + #define FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_SFT 2 + #define FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_FORCED_DOWN (0x0UL << 2) + #define FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_FORCED_UP (0x1UL << 2) + #define FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_AUTO (0x2UL << 2) + #define FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_LAST FUNC_CFG_REQ_OPTIONS_LINK_ADMIN_STATE_AUTO + #define FUNC_CFG_REQ_OPTIONS_RSVD_MASK 0xf0UL + #define FUNC_CFG_REQ_OPTIONS_RSVD_SFT 4 + __le16 num_mcast_filters; + __le16 schq_id; + __le16 mpc_chnls; + #define FUNC_CFG_REQ_MPC_CHNLS_TCE_ENABLE 0x1UL + #define FUNC_CFG_REQ_MPC_CHNLS_TCE_DISABLE 0x2UL + #define FUNC_CFG_REQ_MPC_CHNLS_RCE_ENABLE 0x4UL + #define FUNC_CFG_REQ_MPC_CHNLS_RCE_DISABLE 0x8UL + #define FUNC_CFG_REQ_MPC_CHNLS_TE_CFA_ENABLE 0x10UL + #define FUNC_CFG_REQ_MPC_CHNLS_TE_CFA_DISABLE 0x20UL + #define FUNC_CFG_REQ_MPC_CHNLS_RE_CFA_ENABLE 0x40UL + #define FUNC_CFG_REQ_MPC_CHNLS_RE_CFA_DISABLE 0x80UL + #define FUNC_CFG_REQ_MPC_CHNLS_PRIMATE_ENABLE 0x100UL + #define FUNC_CFG_REQ_MPC_CHNLS_PRIMATE_DISABLE 0x200UL + __le32 partition_min_bw; + #define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE 0x10000000UL + #define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_LAST FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_BYTES + #define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 + __le32 partition_max_bw; + #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE 0x10000000UL + #define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_BITS (0x0UL << 28) + #define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_LAST FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_BYTES + #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 + __be16 tpid; + __le16 host_mtu; + __le32 flags2; + #define FUNC_CFG_REQ_FLAGS2_KTLS_KEY_CTX_ASSETS_TEST 0x1UL + #define FUNC_CFG_REQ_FLAGS2_QUIC_KEY_CTX_ASSETS_TEST 0x2UL + __le32 enables2; + #define FUNC_CFG_REQ_ENABLES2_KDNET 0x1UL + #define FUNC_CFG_REQ_ENABLES2_DB_PAGE_SIZE 0x2UL + #define FUNC_CFG_REQ_ENABLES2_QUIC_TX_KEY_CTXS 0x4UL + #define FUNC_CFG_REQ_ENABLES2_QUIC_RX_KEY_CTXS 0x8UL + #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_AV_PER_VF 0x10UL + #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_CQ_PER_VF 0x20UL + #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_MRW_PER_VF 0x40UL + #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_QP_PER_VF 0x80UL + #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_SRQ_PER_VF 0x100UL + #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF 0x200UL + #define FUNC_CFG_REQ_ENABLES2_XID_PARTITION_CFG 0x400UL + #define FUNC_CFG_REQ_ENABLES2_PHYSICAL_SLOT_NUMBER 0x800UL + u8 port_kdnet_mode; + #define FUNC_CFG_REQ_PORT_KDNET_MODE_DISABLED 0x0UL + #define FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED 0x1UL + #define FUNC_CFG_REQ_PORT_KDNET_MODE_LAST FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED + u8 db_page_size; + #define FUNC_CFG_REQ_DB_PAGE_SIZE_4KB 0x0UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_8KB 0x1UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_16KB 0x2UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_32KB 0x3UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_64KB 0x4UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_128KB 0x5UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_256KB 0x6UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_512KB 0x7UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_1MB 0x8UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_2MB 0x9UL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_4MB 0xaUL + #define FUNC_CFG_REQ_DB_PAGE_SIZE_LAST FUNC_CFG_REQ_DB_PAGE_SIZE_4MB + __le16 physical_slot_number; + __le32 num_ktls_tx_key_ctxs; + __le32 num_ktls_rx_key_ctxs; + __le32 num_quic_tx_key_ctxs; + __le32 num_quic_rx_key_ctxs; + __le32 roce_max_av_per_vf; + __le32 roce_max_cq_per_vf; + __le32 roce_max_mrw_per_vf; + __le32 roce_max_qp_per_vf; + __le32 roce_max_srq_per_vf; + __le32 roce_max_gid_per_vf; + __le16 xid_partition_cfg; + #define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK 0x1UL + #define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK 0x2UL + __le16 unused_2; +}; + +/* hwrm_func_cfg_output (size:128b/16B) */ +struct hwrm_func_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_cfg_cmd_err (size:64b/8B) */ +struct hwrm_func_cfg_cmd_err { + u8 code; + #define FUNC_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE 0x1UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX 0x2UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT 0x4UL + #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT + u8 unused_0[7]; +}; + +/* hwrm_func_qstats_input (size:192b/24B) */ +struct hwrm_func_qstats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 flags; + #define FUNC_QSTATS_REQ_FLAGS_ROCE_ONLY 0x1UL + #define FUNC_QSTATS_REQ_FLAGS_COUNTER_MASK 0x2UL + #define FUNC_QSTATS_REQ_FLAGS_L2_ONLY 0x4UL + u8 unused_0[5]; +}; + +/* hwrm_func_qstats_output (size:1408b/176B) */ +struct hwrm_func_qstats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 tx_ucast_pkts; + __le64 tx_mcast_pkts; + __le64 tx_bcast_pkts; + __le64 tx_discard_pkts; + __le64 tx_drop_pkts; + __le64 tx_ucast_bytes; + __le64 tx_mcast_bytes; + __le64 tx_bcast_bytes; + __le64 rx_ucast_pkts; + __le64 rx_mcast_pkts; + __le64 rx_bcast_pkts; + __le64 rx_discard_pkts; + __le64 rx_drop_pkts; + __le64 rx_ucast_bytes; + __le64 rx_mcast_bytes; + __le64 rx_bcast_bytes; + __le64 rx_agg_pkts; + __le64 rx_agg_bytes; + __le64 rx_agg_events; + __le64 rx_agg_aborts; + u8 clear_seq; + u8 unused_0[6]; + u8 valid; +}; + +/* hwrm_func_qstats_ext_input (size:256b/32B) */ +struct hwrm_func_qstats_ext_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 flags; + #define FUNC_QSTATS_EXT_REQ_FLAGS_ROCE_ONLY 0x1UL + #define FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK 0x2UL + u8 unused_0[1]; + __le32 enables; + #define FUNC_QSTATS_EXT_REQ_ENABLES_SCHQ_ID 0x1UL + __le16 schq_id; + __le16 traffic_class; + u8 unused_1[4]; +}; + +/* hwrm_func_qstats_ext_output (size:1536b/192B) */ +struct hwrm_func_qstats_ext_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 rx_ucast_pkts; + __le64 rx_mcast_pkts; + __le64 rx_bcast_pkts; + __le64 rx_discard_pkts; + __le64 rx_error_pkts; + __le64 rx_ucast_bytes; + __le64 rx_mcast_bytes; + __le64 rx_bcast_bytes; + __le64 tx_ucast_pkts; + __le64 tx_mcast_pkts; + __le64 tx_bcast_pkts; + __le64 tx_error_pkts; + __le64 tx_discard_pkts; + __le64 tx_ucast_bytes; + __le64 tx_mcast_bytes; + __le64 tx_bcast_bytes; + __le64 rx_tpa_eligible_pkt; + __le64 rx_tpa_eligible_bytes; + __le64 rx_tpa_pkt; + __le64 rx_tpa_bytes; + __le64 rx_tpa_errors; + __le64 rx_tpa_events; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_clr_stats_input (size:192b/24B) */ +struct hwrm_func_clr_stats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 unused_0[6]; +}; + +/* hwrm_func_clr_stats_output (size:128b/16B) */ +struct hwrm_func_clr_stats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_vf_resc_free_input (size:192b/24B) */ +struct hwrm_func_vf_resc_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 vf_id; + u8 unused_0[6]; +}; + +/* hwrm_func_vf_resc_free_output (size:128b/16B) */ +struct hwrm_func_vf_resc_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_drv_rgtr_input (size:896b/112B) */ +struct hwrm_func_drv_rgtr_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE 0x1UL + #define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE 0x2UL + #define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE 0x4UL + #define FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE 0x8UL + #define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT 0x10UL + #define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT 0x20UL + #define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT 0x40UL + #define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT 0x80UL + #define FUNC_DRV_RGTR_REQ_FLAGS_RSS_STRICT_HASH_TYPE_SUPPORT 0x100UL + #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL + #define FUNC_DRV_RGTR_REQ_FLAGS_ASYM_QUEUE_CFG_SUPPORT 0x400UL + #define FUNC_DRV_RGTR_REQ_FLAGS_TF_INGRESS_NIC_FLOW_MODE 0x800UL + #define FUNC_DRV_RGTR_REQ_FLAGS_TF_EGRESS_NIC_FLOW_MODE 0x1000UL + __le32 enables; + #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL + #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL + #define FUNC_DRV_RGTR_REQ_ENABLES_TIMESTAMP 0x4UL + #define FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD 0x8UL + #define FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD 0x10UL + __le16 os_type; + #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 0x74UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_UEFI 0x8000UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_LAST FUNC_DRV_RGTR_REQ_OS_TYPE_UEFI + u8 ver_maj_8b; + u8 ver_min_8b; + u8 ver_upd_8b; + u8 unused_0[3]; + __le32 timestamp; + u8 unused_1[4]; + __le32 vf_req_fwd[8]; + __le32 async_event_fwd[8]; + __le16 ver_maj; + __le16 ver_min; + __le16 ver_upd; + __le16 ver_patch; +}; + +/* hwrm_func_drv_rgtr_output (size:128b/16B) */ +struct hwrm_func_drv_rgtr_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define FUNC_DRV_RGTR_RESP_FLAGS_IF_CHANGE_SUPPORTED 0x1UL + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_func_drv_unrgtr_input (size:192b/24B) */ +struct hwrm_func_drv_unrgtr_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define FUNC_DRV_UNRGTR_REQ_FLAGS_PREPARE_FOR_SHUTDOWN 0x1UL + u8 unused_0[4]; +}; + +/* hwrm_func_drv_unrgtr_output (size:128b/16B) */ +struct hwrm_func_drv_unrgtr_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_buf_rgtr_input (size:1024b/128B) */ +struct hwrm_func_buf_rgtr_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_BUF_RGTR_REQ_ENABLES_VF_ID 0x1UL + #define FUNC_BUF_RGTR_REQ_ENABLES_ERR_BUF_ADDR 0x2UL + __le16 vf_id; + __le16 req_buf_num_pages; + __le16 req_buf_page_size; + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B 0x4UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K 0xcUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K 0xdUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K 0x10UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M 0x15UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M 0x16UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G 0x1eUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_LAST FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G + __le16 req_buf_len; + __le16 resp_buf_len; + u8 unused_0[2]; + __le64 req_buf_page_addr0; + __le64 req_buf_page_addr1; + __le64 req_buf_page_addr2; + __le64 req_buf_page_addr3; + __le64 req_buf_page_addr4; + __le64 req_buf_page_addr5; + __le64 req_buf_page_addr6; + __le64 req_buf_page_addr7; + __le64 req_buf_page_addr8; + __le64 req_buf_page_addr9; + __le64 error_buf_addr; + __le64 resp_buf_addr; +}; + +/* hwrm_func_buf_rgtr_output (size:128b/16B) */ +struct hwrm_func_buf_rgtr_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_drv_qver_input (size:192b/24B) */ +struct hwrm_func_drv_qver_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 reserved; + __le16 fid; + u8 driver_type; + #define FUNC_DRV_QVER_REQ_DRIVER_TYPE_L2 0x0UL + #define FUNC_DRV_QVER_REQ_DRIVER_TYPE_ROCE 0x1UL + #define FUNC_DRV_QVER_REQ_DRIVER_TYPE_LAST FUNC_DRV_QVER_REQ_DRIVER_TYPE_ROCE + u8 unused_0; +}; + +/* hwrm_func_drv_qver_output (size:256b/32B) */ +struct hwrm_func_drv_qver_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 os_type; + #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 0x74UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_UEFI 0x8000UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_LAST FUNC_DRV_QVER_RESP_OS_TYPE_UEFI + u8 ver_maj_8b; + u8 ver_min_8b; + u8 ver_upd_8b; + u8 unused_0[3]; + __le16 ver_maj; + __le16 ver_min; + __le16 ver_upd; + __le16 ver_patch; + u8 unused_1[7]; + u8 valid; +}; + +/* hwrm_func_resource_qcaps_input (size:192b/24B) */ +struct hwrm_func_resource_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 unused_0[6]; +}; + +/* hwrm_func_resource_qcaps_output (size:704b/88B) */ +struct hwrm_func_resource_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 max_vfs; + __le16 max_msix; + __le16 vf_reservation_strategy; + #define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MAXIMAL 0x0UL + #define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL 0x1UL + #define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL_STATIC 0x2UL + #define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_LAST FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL_STATIC + __le16 min_rsscos_ctx; + __le16 max_rsscos_ctx; + __le16 min_cmpl_rings; + __le16 max_cmpl_rings; + __le16 min_tx_rings; + __le16 max_tx_rings; + __le16 min_rx_rings; + __le16 max_rx_rings; + __le16 min_l2_ctxs; + __le16 max_l2_ctxs; + __le16 min_vnics; + __le16 max_vnics; + __le16 min_stat_ctx; + __le16 max_stat_ctx; + __le16 min_hw_ring_grps; + __le16 max_hw_ring_grps; + __le16 max_tx_scheduler_inputs; + __le16 flags; + #define FUNC_RESOURCE_QCAPS_RESP_FLAGS_MIN_GUARANTEED 0x1UL + __le16 min_msix; + __le32 min_ktls_tx_key_ctxs; + __le32 max_ktls_tx_key_ctxs; + __le32 min_ktls_rx_key_ctxs; + __le32 max_ktls_rx_key_ctxs; + __le32 min_quic_tx_key_ctxs; + __le32 max_quic_tx_key_ctxs; + __le32 min_quic_rx_key_ctxs; + __le32 max_quic_rx_key_ctxs; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_func_vf_resource_cfg_input (size:704b/88B) */ +struct hwrm_func_vf_resource_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 vf_id; + __le16 max_msix; + __le16 min_rsscos_ctx; + __le16 max_rsscos_ctx; + __le16 min_cmpl_rings; + __le16 max_cmpl_rings; + __le16 min_tx_rings; + __le16 max_tx_rings; + __le16 min_rx_rings; + __le16 max_rx_rings; + __le16 min_l2_ctxs; + __le16 max_l2_ctxs; + __le16 min_vnics; + __le16 max_vnics; + __le16 min_stat_ctx; + __le16 max_stat_ctx; + __le16 min_hw_ring_grps; + __le16 max_hw_ring_grps; + __le16 flags; + #define FUNC_VF_RESOURCE_CFG_REQ_FLAGS_MIN_GUARANTEED 0x1UL + __le16 min_msix; + __le32 min_ktls_tx_key_ctxs; + __le32 max_ktls_tx_key_ctxs; + __le32 min_ktls_rx_key_ctxs; + __le32 max_ktls_rx_key_ctxs; + __le32 min_quic_tx_key_ctxs; + __le32 max_quic_tx_key_ctxs; + __le32 min_quic_rx_key_ctxs; + __le32 max_quic_rx_key_ctxs; +}; + +/* hwrm_func_vf_resource_cfg_output (size:384b/48B) */ +struct hwrm_func_vf_resource_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 reserved_rsscos_ctx; + __le16 reserved_cmpl_rings; + __le16 reserved_tx_rings; + __le16 reserved_rx_rings; + __le16 reserved_l2_ctxs; + __le16 reserved_vnics; + __le16 reserved_stat_ctx; + __le16 reserved_hw_ring_grps; + __le32 reserved_ktls_tx_key_ctxs; + __le32 reserved_ktls_rx_key_ctxs; + __le32 reserved_quic_tx_key_ctxs; + __le32 reserved_quic_rx_key_ctxs; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_qcaps_input (size:128b/16B) */ +struct hwrm_func_backing_store_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_func_backing_store_qcaps_output (size:832b/104B) */ +struct hwrm_func_backing_store_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 qp_max_entries; + __le16 qp_min_qp1_entries; + __le16 qp_max_l2_entries; + __le16 qp_entry_size; + __le16 srq_max_l2_entries; + __le32 srq_max_entries; + __le16 srq_entry_size; + __le16 cq_max_l2_entries; + __le32 cq_max_entries; + __le16 cq_entry_size; + __le16 vnic_max_vnic_entries; + __le16 vnic_max_ring_table_entries; + __le16 vnic_entry_size; + __le32 stat_max_entries; + __le16 stat_entry_size; + __le16 tqm_entry_size; + __le32 tqm_min_entries_per_ring; + __le32 tqm_max_entries_per_ring; + __le32 mrav_max_entries; + __le16 mrav_entry_size; + __le16 tim_entry_size; + __le32 tim_max_entries; + __le16 mrav_num_entries_units; + u8 tqm_entries_multiple; + u8 ctx_kind_initializer; + __le16 ctx_init_mask; + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_QP 0x1UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_SRQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_CQ 0x4UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_VNIC 0x8UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_STAT 0x10UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_MRAV 0x20UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_TKC 0x40UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_RKC 0x80UL + u8 qp_init_offset; + u8 srq_init_offset; + u8 cq_init_offset; + u8 vnic_init_offset; + u8 tqm_fp_rings_count; + u8 stat_init_offset; + u8 mrav_init_offset; + u8 tqm_fp_rings_count_ext; + u8 tkc_init_offset; + u8 rkc_init_offset; + __le16 tkc_entry_size; + __le16 rkc_entry_size; + __le32 tkc_max_entries; + __le32 rkc_max_entries; + __le16 fast_qpmd_qp_num_entries; + u8 rsvd1[5]; + u8 valid; +}; + +/* tqm_fp_ring_cfg (size:128b/16B) */ +struct tqm_fp_ring_cfg { + u8 tqm_ring_pg_size_tqm_ring_lvl; + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_MASK 0xfUL + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_SFT 0 + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_LVL_0 0x0UL + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_LVL_1 0x1UL + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_LVL_2 0x2UL + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_LAST TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_LVL_LVL_2 + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_MASK 0xf0UL + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_SFT 4 + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_LAST TQM_FP_RING_CFG_TQM_RING_CFG_TQM_RING_PG_SIZE_PG_1G + u8 unused[3]; + __le32 tqm_ring_num_entries; + __le64 tqm_ring_page_dir; +}; + +/* hwrm_func_backing_store_cfg_input (size:2688b/336B) */ +struct hwrm_func_backing_store_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT 0x2UL + __le32 enables; + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ 0x4UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC 0x8UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT 0x10UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP 0x20UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0 0x40UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1 0x80UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2 0x100UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3 0x200UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4 0x400UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5 0x800UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6 0x1000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7 0x2000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV 0x4000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM 0x8000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING8 0x10000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING9 0x20000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING10 0x40000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TKC 0x80000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_RKC 0x100000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP_FAST_QPMD 0x200000UL + u8 qpc_pg_size_qpc_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_1G + u8 srq_pg_size_srq_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_SRQ_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_1G + u8 cq_pg_size_cq_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_CQ_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_CQ_PG_SIZE_PG_1G + u8 vnic_pg_size_vnic_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_VNIC_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_VNIC_PG_SIZE_PG_1G + u8 stat_pg_size_stat_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_STAT_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_STAT_PG_SIZE_PG_1G + u8 tqm_sp_pg_size_tqm_sp_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_SP_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_SP_PG_SIZE_PG_1G + u8 tqm_ring0_pg_size_tqm_ring0_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING0_PG_SIZE_PG_1G + u8 tqm_ring1_pg_size_tqm_ring1_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING1_PG_SIZE_PG_1G + u8 tqm_ring2_pg_size_tqm_ring2_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING2_PG_SIZE_PG_1G + u8 tqm_ring3_pg_size_tqm_ring3_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING3_PG_SIZE_PG_1G + u8 tqm_ring4_pg_size_tqm_ring4_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING4_PG_SIZE_PG_1G + u8 tqm_ring5_pg_size_tqm_ring5_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING5_PG_SIZE_PG_1G + u8 tqm_ring6_pg_size_tqm_ring6_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING6_PG_SIZE_PG_1G + u8 tqm_ring7_pg_size_tqm_ring7_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TQM_RING7_PG_SIZE_PG_1G + u8 mrav_pg_size_mrav_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_MRAV_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_MRAV_PG_SIZE_PG_1G + u8 tim_pg_size_tim_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TIM_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TIM_PG_SIZE_PG_1G + __le64 qpc_page_dir; + __le64 srq_page_dir; + __le64 cq_page_dir; + __le64 vnic_page_dir; + __le64 stat_page_dir; + __le64 tqm_sp_page_dir; + __le64 tqm_ring0_page_dir; + __le64 tqm_ring1_page_dir; + __le64 tqm_ring2_page_dir; + __le64 tqm_ring3_page_dir; + __le64 tqm_ring4_page_dir; + __le64 tqm_ring5_page_dir; + __le64 tqm_ring6_page_dir; + __le64 tqm_ring7_page_dir; + __le64 mrav_page_dir; + __le64 tim_page_dir; + __le32 qp_num_entries; + __le32 srq_num_entries; + __le32 cq_num_entries; + __le32 stat_num_entries; + __le32 tqm_sp_num_entries; + __le32 tqm_ring0_num_entries; + __le32 tqm_ring1_num_entries; + __le32 tqm_ring2_num_entries; + __le32 tqm_ring3_num_entries; + __le32 tqm_ring4_num_entries; + __le32 tqm_ring5_num_entries; + __le32 tqm_ring6_num_entries; + __le32 tqm_ring7_num_entries; + __le32 mrav_num_entries; + __le32 tim_num_entries; + __le16 qp_num_qp1_entries; + __le16 qp_num_l2_entries; + __le16 qp_entry_size; + __le16 srq_num_l2_entries; + __le16 srq_entry_size; + __le16 cq_num_l2_entries; + __le16 cq_entry_size; + __le16 vnic_num_vnic_entries; + __le16 vnic_num_ring_table_entries; + __le16 vnic_entry_size; + __le16 stat_entry_size; + __le16 tqm_entry_size; + __le16 mrav_entry_size; + __le16 tim_entry_size; + u8 tqm_ring8_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G + u8 ring8_unused[3]; + __le32 tqm_ring8_num_entries; + __le64 tqm_ring8_page_dir; + u8 tqm_ring9_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G + u8 ring9_unused[3]; + __le32 tqm_ring9_num_entries; + __le64 tqm_ring9_page_dir; + u8 tqm_ring10_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G + u8 ring10_unused[3]; + __le32 tqm_ring10_num_entries; + __le64 tqm_ring10_page_dir; + __le32 tkc_num_entries; + __le32 rkc_num_entries; + __le64 tkc_page_dir; + __le64 rkc_page_dir; + __le16 tkc_entry_size; + __le16 rkc_entry_size; + u8 tkc_pg_size_tkc_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_1G + u8 rkc_pg_size_rkc_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_1G + __le16 qp_num_fast_qpmd_entries; +}; + +/* hwrm_func_backing_store_cfg_output (size:128b/16B) */ +struct hwrm_func_backing_store_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_error_recovery_qcfg_input (size:192b/24B) */ +struct hwrm_error_recovery_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 unused_0[8]; +}; + +/* hwrm_error_recovery_qcfg_output (size:1664b/208B) */ +struct hwrm_error_recovery_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define ERROR_RECOVERY_QCFG_RESP_FLAGS_HOST 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU 0x2UL + __le32 driver_polling_freq; + __le32 master_func_wait_period; + __le32 normal_func_wait_period; + __le32 master_func_wait_period_after_reset; + __le32 max_bailout_time_after_reset; + __le32 fw_health_status_reg; + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SFT 2 + __le32 fw_heartbeat_reg; + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SFT 2 + __le32 fw_reset_cnt_reg; + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SFT 2 + __le32 reset_inprogress_reg; + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SFT 2 + __le32 reset_inprogress_reg_mask; + u8 unused_0[3]; + u8 reg_array_cnt; + __le32 reset_reg[16]; + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SFT 2 + __le32 reset_reg_val[16]; + u8 delay_after_reset[16]; + __le32 err_recovery_cnt_reg; + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SFT 2 + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_func_echo_response_input (size:192b/24B) */ +struct hwrm_func_echo_response_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 event_data1; + __le32 event_data2; +}; + +/* hwrm_func_echo_response_output (size:128b/16B) */ +struct hwrm_func_echo_response_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_ptp_pin_qcfg_input (size:192b/24B) */ +struct hwrm_func_ptp_pin_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 unused_0[8]; +}; + +/* hwrm_func_ptp_pin_qcfg_output (size:128b/16B) */ +struct hwrm_func_ptp_pin_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 num_pins; + u8 state; + #define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN0_ENABLED 0x1UL + #define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN1_ENABLED 0x2UL + #define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN2_ENABLED 0x4UL + #define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN3_ENABLED 0x8UL + u8 pin0_usage; + #define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_LAST FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_SYNC_OUT + u8 pin1_usage; + #define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_LAST FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_SYNC_OUT + u8 pin2_usage; + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNCE_PRIMARY_CLOCK_OUT 0x5UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNCE_SECONDARY_CLOCK_OUT 0x6UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_LAST FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNCE_SECONDARY_CLOCK_OUT + u8 pin3_usage; + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNCE_PRIMARY_CLOCK_OUT 0x5UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNCE_SECONDARY_CLOCK_OUT 0x6UL + #define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_LAST FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNCE_SECONDARY_CLOCK_OUT + u8 unused_0; + u8 valid; +}; + +/* hwrm_func_ptp_pin_cfg_input (size:256b/32B) */ +struct hwrm_func_ptp_pin_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_STATE 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_USAGE 0x2UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN1_STATE 0x4UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN1_USAGE 0x8UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN2_STATE 0x10UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN2_USAGE 0x20UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN3_STATE 0x40UL + #define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN3_USAGE 0x80UL + u8 pin0_state; + #define FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_DISABLED 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_ENABLED 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_LAST FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_ENABLED + u8 pin0_usage; + #define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_LAST FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_SYNC_OUT + u8 pin1_state; + #define FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_DISABLED 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_ENABLED 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_LAST FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_ENABLED + u8 pin1_usage; + #define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_LAST FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_SYNC_OUT + u8 pin2_state; + #define FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_DISABLED 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_ENABLED 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_LAST FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_ENABLED + u8 pin2_usage; + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNCE_PRIMARY_CLOCK_OUT 0x5UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNCE_SECONDARY_CLOCK_OUT 0x6UL + #define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_LAST FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNCE_SECONDARY_CLOCK_OUT + u8 pin3_state; + #define FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_DISABLED 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_ENABLED 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_LAST FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_ENABLED + u8 pin3_usage; + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_NONE 0x0UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_PPS_IN 0x1UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_PPS_OUT 0x2UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNC_IN 0x3UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNC_OUT 0x4UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNCE_PRIMARY_CLOCK_OUT 0x5UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNCE_SECONDARY_CLOCK_OUT 0x6UL + #define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_LAST FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNCE_SECONDARY_CLOCK_OUT + u8 unused_0[4]; +}; + +/* hwrm_func_ptp_pin_cfg_output (size:128b/16B) */ +struct hwrm_func_ptp_pin_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_ptp_cfg_input (size:384b/48B) */ +struct hwrm_func_ptp_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 enables; + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_PPS_EVENT 0x1UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_DLL_SOURCE 0x2UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_DLL_PHASE 0x4UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD 0x8UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP 0x10UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE 0x20UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME 0x40UL + u8 ptp_pps_event; + #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL 0x1UL + #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL 0x2UL + u8 ptp_freq_adj_dll_source; + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_NONE 0x0UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_0 0x1UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_1 0x2UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_2 0x3UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_3 0x4UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_0 0x5UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_1 0x6UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_2 0x7UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_3 0x8UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_INVALID 0xffUL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_LAST FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_INVALID + u8 ptp_freq_adj_dll_phase; + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_NONE 0x0UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_4K 0x1UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_8K 0x2UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_10M 0x3UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_25M 0x4UL + #define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_LAST FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_25M + u8 unused_0[3]; + __le32 ptp_freq_adj_ext_period; + __le32 ptp_freq_adj_ext_up; + __le32 ptp_freq_adj_ext_phase_lower; + __le32 ptp_freq_adj_ext_phase_upper; + __le64 ptp_set_time; +}; + +/* hwrm_func_ptp_cfg_output (size:128b/16B) */ +struct hwrm_func_ptp_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_ptp_ts_query_input (size:192b/24B) */ +struct hwrm_func_ptp_ts_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define FUNC_PTP_TS_QUERY_REQ_FLAGS_PPS_TIME 0x1UL + #define FUNC_PTP_TS_QUERY_REQ_FLAGS_PTM_TIME 0x2UL + u8 unused_0[4]; +}; + +/* hwrm_func_ptp_ts_query_output (size:320b/40B) */ +struct hwrm_func_ptp_ts_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 pps_event_ts; + __le64 ptm_local_ts; + __le64 ptm_system_ts; + __le32 ptm_link_delay; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */ +struct hwrm_func_ptp_ext_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 enables; + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID 0x1UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID 0x2UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE 0x4UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER 0x8UL + __le16 phc_master_fid; + __le16 phc_sec_fid; + u8 phc_sec_mode; + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH 0x0UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL 0x1UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY + u8 unused_0; + __le32 failover_timer; + u8 unused_1[4]; +}; + +/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */ +struct hwrm_func_ptp_ext_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */ +struct hwrm_func_ptp_ext_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 unused_0[8]; +}; + +/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */ +struct hwrm_func_ptp_ext_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 phc_master_fid; + __le16 phc_sec_fid; + __le16 phc_active_fid0; + __le16 phc_active_fid1; + __le32 last_failover_event; + __le16 from_fid; + __le16 to_fid; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_cfg_v2_input (size:512b/64B) */ +struct hwrm_func_backing_store_cfg_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TTX_PACING_TQM_RING 0x25UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA0_TRACE 0x26UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA1_TRACE 0x27UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE 0x28UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID + __le16 instance; + __le32 flags; + #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_BS_CFG_ALL_DONE 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_BS_EXTEND 0x4UL + __le64 page_dir; + __le32 num_entries; + __le16 entry_size; + u8 page_size_pbl_level; + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT 0 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G + u8 subtype_valid_cnt; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + __le32 enables; + #define FUNC_BACKING_STORE_CFG_V2_REQ_ENABLES_NEXT_BS_OFFSET 0x1UL + __le32 next_bs_offset; +}; + +/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */ +struct hwrm_func_backing_store_cfg_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 rsvd0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */ +struct hwrm_func_backing_store_qcfg_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_XID_PARTITION_TABLE 0x1dUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TTX_PACING_TQM_RING 0x25UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA0_TRACE 0x26UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA1_TRACE 0x27UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE 0x28UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID + __le16 instance; + u8 rsvd[4]; +}; + +/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */ +struct hwrm_func_backing_store_qcfg_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 type; + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TTX_PACING_TQM_RING 0x25UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA0_TRACE 0x26UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE 0x27UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE 0x28UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID + __le16 instance; + __le32 flags; + __le64 page_dir; + __le32 num_entries; + u8 page_size_pbl_level; + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT 0 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT 4 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G + u8 subtype_valid_cnt; + u8 rsvd[2]; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + u8 rsvd2[7]; + u8 valid; +}; + +/* qpc_split_entries (size:128b/16B) */ +struct qpc_split_entries { + __le32 qp_num_l2_entries; + __le32 qp_num_qp1_entries; + __le32 qp_num_fast_qpmd_entries; + __le32 rsvd; +}; + +/* srq_split_entries (size:128b/16B) */ +struct srq_split_entries { + __le32 srq_num_l2_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* cq_split_entries (size:128b/16B) */ +struct cq_split_entries { + __le32 cq_num_l2_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* vnic_split_entries (size:128b/16B) */ +struct vnic_split_entries { + __le32 vnic_num_vnic_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* mrav_split_entries (size:128b/16B) */ +struct mrav_split_entries { + __le32 mrav_num_av_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* ts_split_entries (size:128b/16B) */ +struct ts_split_entries { + __le32 region_num_entries; + u8 tsid; + u8 lkup_static_bkt_cnt_exp[2]; + u8 locked; + __le32 rsvd2[2]; +}; + +/* ck_split_entries (size:128b/16B) */ +struct ck_split_entries { + __le32 num_quic_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */ +struct hwrm_func_backing_store_qcaps_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TTX_PACING_TQM_RING 0x25UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA0_TRACE 0x26UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA1_TRACE 0x27UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE 0x28UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID + u8 rsvd[6]; +}; + +/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */ +struct hwrm_func_backing_store_qcaps_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 type; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TTX_PACING_TQM_RING 0x25UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA0_TRACE 0x26UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA1_TRACE 0x27UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE 0x28UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID + __le16 entry_size; + __le32 flags; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_DRIVER_MANAGED_MEMORY 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ROCE_QP_PSEUDO_STATIC_ALLOC 0x8UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_FW_DBG_TRACE 0x10UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_FW_BIN_DBG_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_NEXT_BS_OFFSET 0x40UL + __le32 instance_bit_map; + u8 ctx_init_value; + u8 ctx_init_offset; + u8 entry_multiple; + u8 rsvd; + __le32 max_num_entries; + __le32 min_num_entries; + __le16 next_valid_type; + u8 subtype_valid_cnt; + u8 exact_cnt_bit_map; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_EXACT_CNT_BIT_MAP_SPLIT_ENTRY_0_EXACT 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_EXACT_CNT_BIT_MAP_SPLIT_ENTRY_1_EXACT 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_EXACT_CNT_BIT_MAP_SPLIT_ENTRY_2_EXACT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_EXACT_CNT_BIT_MAP_SPLIT_ENTRY_3_EXACT 0x8UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_EXACT_CNT_BIT_MAP_UNUSED_MASK 0xf0UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_EXACT_CNT_BIT_MAP_UNUSED_SFT 4 + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + __le16 max_instance_count; + u8 rsvd3; + u8 valid; +}; + +/* hwrm_func_dbr_pacing_qcfg_input (size:128b/16B) */ +struct hwrm_func_dbr_pacing_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_func_dbr_pacing_qcfg_output (size:512b/64B) */ +struct hwrm_func_dbr_pacing_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define FUNC_DBR_PACING_QCFG_RESP_FLAGS_DBR_NQ_EVENT_ENABLED 0x1UL + u8 unused_0[7]; + __le32 dbr_stat_db_fifo_reg; + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK 0x3UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_SFT 0 + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC 0x1UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR0 0x2UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR1 0x3UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_LAST FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR1 + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_MASK 0xfffffffcUL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SFT 2 + __le32 dbr_stat_db_fifo_reg_watermark_mask; + u8 dbr_stat_db_fifo_reg_watermark_shift; + u8 unused_1[3]; + __le32 dbr_stat_db_fifo_reg_fifo_room_mask; + u8 dbr_stat_db_fifo_reg_fifo_room_shift; + u8 unused_2[3]; + __le32 dbr_throttling_aeq_arm_reg; + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK 0x3UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_SFT 0 + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC 0x1UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR0 0x2UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR1 0x3UL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_LAST FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR1 + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_MASK 0xfffffffcUL + #define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SFT 2 + u8 dbr_throttling_aeq_arm_reg_val; + u8 unused_3[3]; + __le32 dbr_stat_db_max_fifo_depth; + __le32 primary_nq_id; + __le32 pacing_threshold; + u8 unused_4[7]; + u8 valid; +}; + +/* hwrm_func_drv_if_change_input (size:192b/24B) */ +struct hwrm_func_drv_if_change_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP 0x1UL + __le32 unused; +}; + +/* hwrm_func_drv_if_change_output (size:128b/16B) */ +struct hwrm_func_drv_if_change_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE 0x1UL + #define FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE 0x2UL + #define FUNC_DRV_IF_CHANGE_RESP_FLAGS_CAPS_CHANGE 0x4UL + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_port_phy_cfg_input (size:512b/64B) */ +struct hwrm_port_phy_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define PORT_PHY_CFG_REQ_FLAGS_RESET_PHY 0x1UL + #define PORT_PHY_CFG_REQ_FLAGS_DEPRECATED 0x2UL + #define PORT_PHY_CFG_REQ_FLAGS_FORCE 0x4UL + #define PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG 0x8UL + #define PORT_PHY_CFG_REQ_FLAGS_EEE_ENABLE 0x10UL + #define PORT_PHY_CFG_REQ_FLAGS_EEE_DISABLE 0x20UL + #define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_ENABLE 0x40UL + #define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE 0x80UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE 0x100UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE 0x200UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE 0x400UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE 0x800UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE 0x1000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE 0x2000UL + #define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN 0x4000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_ENABLE 0x8000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_1XN_DISABLE 0x10000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_IEEE_ENABLE 0x20000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS544_IEEE_DISABLE 0x40000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_ENABLE 0x80000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE 0x100000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE 0x200000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE 0x400000UL + __le32 enables; + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE 0x1UL + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX 0x2UL + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_PAUSE 0x4UL + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_LINK_SPEED 0x8UL + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_LINK_SPEED_MASK 0x10UL + #define PORT_PHY_CFG_REQ_ENABLES_WIRESPEED 0x20UL + #define PORT_PHY_CFG_REQ_ENABLES_LPBK 0x40UL + #define PORT_PHY_CFG_REQ_ENABLES_PREEMPHASIS 0x80UL + #define PORT_PHY_CFG_REQ_ENABLES_FORCE_PAUSE 0x100UL + #define PORT_PHY_CFG_REQ_ENABLES_EEE_LINK_SPEED_MASK 0x200UL + #define PORT_PHY_CFG_REQ_ENABLES_TX_LPI_TIMER 0x400UL + #define PORT_PHY_CFG_REQ_ENABLES_FORCE_PAM4_LINK_SPEED 0x800UL + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_PAM4_LINK_SPEED_MASK 0x1000UL + #define PORT_PHY_CFG_REQ_ENABLES_FORCE_LINK_SPEEDS2 0x2000UL + #define PORT_PHY_CFG_REQ_ENABLES_AUTO_LINK_SPEEDS2_MASK 0x4000UL + __le16 port_id; + __le16 force_link_speed; + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB 0xffffUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB + u8 auto_mode; + #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK 0x4UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_LAST PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK + u8 auto_duplex; + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_LAST PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH + u8 auto_pause; + #define PORT_PHY_CFG_REQ_AUTO_PAUSE_TX 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_PAUSE_RX 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL + u8 mgmt_flag; + #define PORT_PHY_CFG_REQ_MGMT_FLAG_LINK_RELEASE 0x1UL + #define PORT_PHY_CFG_REQ_MGMT_FLAG_MGMT_VALID 0x80UL + __le16 auto_link_speed; + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB 0xffffUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_LAST PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB + __le16 auto_link_speed_mask; + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MB 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_1GBHD 0x4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_1GB 0x8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_2GB 0x10UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_2_5GB 0x20UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10GB 0x40UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_20GB 0x80UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_25GB 0x100UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_40GB 0x200UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_50GB 0x400UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100GB 0x800UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB 0x2000UL + u8 wirespeed; + #define PORT_PHY_CFG_REQ_WIRESPEED_OFF 0x0UL + #define PORT_PHY_CFG_REQ_WIRESPEED_ON 0x1UL + #define PORT_PHY_CFG_REQ_WIRESPEED_LAST PORT_PHY_CFG_REQ_WIRESPEED_ON + u8 lpbk; + #define PORT_PHY_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_PHY_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_PHY_CFG_REQ_LPBK_REMOTE 0x2UL + #define PORT_PHY_CFG_REQ_LPBK_EXTERNAL 0x3UL + #define PORT_PHY_CFG_REQ_LPBK_LAST PORT_PHY_CFG_REQ_LPBK_EXTERNAL + u8 force_pause; + #define PORT_PHY_CFG_REQ_FORCE_PAUSE_TX 0x1UL + #define PORT_PHY_CFG_REQ_FORCE_PAUSE_RX 0x2UL + u8 unused_1; + __le32 preemphasis; + __le16 eee_link_speed_mask; + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_RSVD1 0x1UL + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_100MB 0x2UL + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_RSVD2 0x4UL + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_1GB 0x8UL + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_RSVD3 0x10UL + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_RSVD4 0x20UL + #define PORT_PHY_CFG_REQ_EEE_LINK_SPEED_MASK_10GB 0x40UL + __le16 force_pam4_link_speed; + #define PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_200GB 0x7d0UL + #define PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_LAST PORT_PHY_CFG_REQ_FORCE_PAM4_LINK_SPEED_200GB + __le32 tx_lpi_timer; + #define PORT_PHY_CFG_REQ_TX_LPI_TIMER_MASK 0xffffffUL + #define PORT_PHY_CFG_REQ_TX_LPI_TIMER_SFT 0 + __le16 auto_link_pam4_speed_mask; + #define PORT_PHY_CFG_REQ_AUTO_LINK_PAM4_SPEED_MASK_50G 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_PAM4_SPEED_MASK_100G 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_PAM4_SPEED_MASK_200G 0x4UL + __le16 force_link_speeds2; + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_1GB 0xaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_10GB 0x64UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_40GB 0x190UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_50GB_PAM4_56 0x1f5UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_56 0x3e9UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_56 0x7d1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_56 0xfa1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_112 0x3eaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_112 0x7d2UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 0xfa2UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 0x1f42UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 + __le16 auto_link_speeds2_mask; + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_1GB 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_10GB 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_25GB 0x4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_40GB 0x8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_50GB 0x10UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_100GB 0x20UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_50GB_PAM4_56 0x40UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_100GB_PAM4_56 0x80UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_200GB_PAM4_56 0x100UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_400GB_PAM4_56 0x200UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_100GB_PAM4_112 0x400UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_200GB_PAM4_112 0x800UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_400GB_PAM4_112 0x1000UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_800GB_PAM4_112 0x2000UL + u8 unused_2[6]; +}; + +/* hwrm_port_phy_cfg_output (size:128b/16B) */ +struct hwrm_port_phy_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_phy_cfg_cmd_err (size:64b/8B) */ +struct hwrm_port_phy_cfg_cmd_err { + u8 code; + #define PORT_PHY_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define PORT_PHY_CFG_CMD_ERR_CODE_ILLEGAL_SPEED 0x1UL + #define PORT_PHY_CFG_CMD_ERR_CODE_RETRY 0x2UL + #define PORT_PHY_CFG_CMD_ERR_CODE_LAST PORT_PHY_CFG_CMD_ERR_CODE_RETRY + u8 unused_0[7]; +}; + +/* hwrm_port_phy_qcfg_input (size:192b/24B) */ +struct hwrm_port_phy_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_phy_qcfg_output (size:832b/104B) */ +struct hwrm_port_phy_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 link; + #define PORT_PHY_QCFG_RESP_LINK_NO_LINK 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_SIGNAL 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_LINK 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_LAST PORT_PHY_QCFG_RESP_LINK_LINK + u8 active_fec_signal_mode; + #define PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK 0xfUL + #define PORT_PHY_QCFG_RESP_SIGNAL_MODE_SFT 0 + #define PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ 0x0UL + #define PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4 0x1UL + #define PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4_112 0x2UL + #define PORT_PHY_QCFG_RESP_SIGNAL_MODE_LAST PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4_112 + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_MASK 0xf0UL + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_SFT 4 + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE (0x0UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE74_ACTIVE (0x1UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE91_ACTIVE (0x2UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_1XN_ACTIVE (0x3UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_IEEE_ACTIVE (0x4UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_1XN_ACTIVE (0x5UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE (0x6UL << 4) + #define PORT_PHY_QCFG_RESP_ACTIVE_FEC_LAST PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE + __le16 link_speed; + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_200GB 0x7d0UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_400GB 0xfa0UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_800GB 0x1f40UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_LINK_SPEED_10MB + u8 duplex_cfg; + #define PORT_PHY_QCFG_RESP_DUPLEX_CFG_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL 0x1UL + #define PORT_PHY_QCFG_RESP_DUPLEX_CFG_LAST PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL + u8 pause; + #define PORT_PHY_QCFG_RESP_PAUSE_TX 0x1UL + #define PORT_PHY_QCFG_RESP_PAUSE_RX 0x2UL + __le16 support_speeds; + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_100MBHD 0x1UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_100MB 0x2UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_1GBHD 0x4UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_1GB 0x8UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_2GB 0x10UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_2_5GB 0x20UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10GB 0x40UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_20GB 0x80UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_25GB 0x100UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_40GB 0x200UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_50GB 0x400UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_100GB 0x800UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD 0x1000UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB 0x2000UL + __le16 force_link_speed; + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB 0xffffUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB + u8 auto_mode; + #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK 0x4UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_LAST PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK + u8 auto_pause; + #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_TX 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_RX 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL + __le16 auto_link_speed; + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB 0xffffUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB + __le16 auto_link_speed_mask; + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MB 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_1GBHD 0x4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_1GB 0x8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_2GB 0x10UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_2_5GB 0x20UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10GB 0x40UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_20GB 0x80UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_25GB 0x100UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_40GB 0x200UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_50GB 0x400UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100GB 0x800UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB 0x2000UL + u8 wirespeed; + #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF 0x0UL + #define PORT_PHY_QCFG_RESP_WIRESPEED_ON 0x1UL + #define PORT_PHY_QCFG_RESP_WIRESPEED_LAST PORT_PHY_QCFG_RESP_WIRESPEED_ON + u8 lpbk; + #define PORT_PHY_QCFG_RESP_LPBK_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_PHY_QCFG_RESP_LPBK_REMOTE 0x2UL + #define PORT_PHY_QCFG_RESP_LPBK_EXTERNAL 0x3UL + #define PORT_PHY_QCFG_RESP_LPBK_LAST PORT_PHY_QCFG_RESP_LPBK_EXTERNAL + u8 force_pause; + #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_TX 0x1UL + #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_RX 0x2UL + u8 module_status; + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX 0x1UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG 0x2UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN 0x3UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED 0x4UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_CURRENTFAULT 0x5UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_OVERHEATED 0x6UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE 0xffUL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_LAST PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE + __le32 preemphasis; + u8 phy_maj; + u8 phy_min; + u8 phy_bld; + u8 phy_type; + #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR 0x1UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 0x2UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR 0x3UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR 0x4UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 0x5UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX 0x6UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR 0x7UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET 0x8UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE 0x9UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY 0xaUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_25G_BASECR_CA_L 0xbUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_25G_BASECR_CA_S 0xcUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_25G_BASECR_CA_N 0xdUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_25G_BASESR 0xeUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR4 0xfUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR4 0x10UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR4 0x11UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER4 0x12UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR10 0x13UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASECR4 0x14UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASESR4 0x15UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASELR4 0x16UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASEER4 0x17UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_ACTIVE_CABLE 0x18UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASET 0x19UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASESX 0x1aUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASECX 0x1bUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASECR4 0x1cUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR4 0x1dUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR4 0x1eUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4 0x1fUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASECR 0x20UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASESR 0x21UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASELR 0x22UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASEER 0x23UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR2 0x24UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR2 0x25UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR2 0x26UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER2 0x27UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR 0x28UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR 0x29UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR 0x2aUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER 0x2bUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASECR2 0x2cUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR2 0x2dUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR2 0x2eUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER2 0x2fUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASECR8 0x30UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR8 0x31UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR8 0x32UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER8 0x33UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASECR4 0x34UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR4 0x35UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR4 0x36UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4 0x37UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASECR8 0x38UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASESR8 0x39UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASELR8 0x3aUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEER8 0x3bUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEFR8 0x3cUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 0x3dUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 + u8 media_type; + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC 0x2UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE 0x3UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_BACKPLANE 0x4UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_LAST PORT_PHY_QCFG_RESP_MEDIA_TYPE_BACKPLANE + u8 xcvr_pkg_type; + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL 0x1UL + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL 0x2UL + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_LAST PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL + u8 eee_config_phy_addr; + #define PORT_PHY_QCFG_RESP_PHY_ADDR_MASK 0x1fUL + #define PORT_PHY_QCFG_RESP_PHY_ADDR_SFT 0 + #define PORT_PHY_QCFG_RESP_EEE_CONFIG_MASK 0xe0UL + #define PORT_PHY_QCFG_RESP_EEE_CONFIG_SFT 5 + #define PORT_PHY_QCFG_RESP_EEE_CONFIG_EEE_ENABLED 0x20UL + #define PORT_PHY_QCFG_RESP_EEE_CONFIG_EEE_ACTIVE 0x40UL + #define PORT_PHY_QCFG_RESP_EEE_CONFIG_EEE_TX_LPI 0x80UL + u8 parallel_detect; + #define PORT_PHY_QCFG_RESP_PARALLEL_DETECT 0x1UL + __le16 link_partner_adv_speeds; + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_100MBHD 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_100MB 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_1GBHD 0x4UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_1GB 0x8UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_2GB 0x10UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_2_5GB 0x20UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10GB 0x40UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_20GB 0x80UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_25GB 0x100UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_40GB 0x200UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_50GB 0x400UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_100GB 0x800UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MBHD 0x1000UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MB 0x2000UL + u8 link_partner_adv_auto_mode; + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK 0x4UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_LAST PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK + u8 link_partner_adv_pause; + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_TX 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_RX 0x2UL + __le16 adv_eee_link_speed_mask; + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_RSVD1 0x1UL + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_100MB 0x2UL + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_RSVD2 0x4UL + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_1GB 0x8UL + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_RSVD3 0x10UL + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_RSVD4 0x20UL + #define PORT_PHY_QCFG_RESP_ADV_EEE_LINK_SPEED_MASK_10GB 0x40UL + __le16 link_partner_adv_eee_link_speed_mask; + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD1 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_100MB 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD2 0x4UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_1GB 0x8UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD3 0x10UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD4 0x20UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_10GB 0x40UL + __le32 xcvr_identifier_type_tx_lpi_timer; + #define PORT_PHY_QCFG_RESP_TX_LPI_TIMER_MASK 0xffffffUL + #define PORT_PHY_QCFG_RESP_TX_LPI_TIMER_SFT 0 + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_MASK 0xff000000UL + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_SFT 24 + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_UNKNOWN (0x0UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_SFP (0x3UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP (0xcUL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPPLUS (0xdUL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28 (0x11UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPDD (0x18UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP112 (0x1eUL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_SFPDD (0x1fUL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_CSFP (0x20UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_LAST PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_CSFP + __le16 fec_cfg; + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED 0x1UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED 0x2UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED 0x4UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED 0x8UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED 0x10UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED 0x20UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED 0x40UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_SUPPORTED 0x80UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_ENABLED 0x100UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_IEEE_SUPPORTED 0x200UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_IEEE_ENABLED 0x400UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_SUPPORTED 0x800UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_ENABLED 0x1000UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_SUPPORTED 0x2000UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_ENABLED 0x4000UL + u8 duplex_state; + #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL 0x1UL + #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_LAST PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL + u8 option_flags; + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SPEEDS2_SUPPORTED 0x4UL + char phy_vendor_name[16]; + char phy_vendor_partnumber[16]; + __le16 support_pam4_speeds; + #define PORT_PHY_QCFG_RESP_SUPPORT_PAM4_SPEEDS_50G 0x1UL + #define PORT_PHY_QCFG_RESP_SUPPORT_PAM4_SPEEDS_100G 0x2UL + #define PORT_PHY_QCFG_RESP_SUPPORT_PAM4_SPEEDS_200G 0x4UL + __le16 force_pam4_link_speed; + #define PORT_PHY_QCFG_RESP_FORCE_PAM4_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_PAM4_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_PAM4_LINK_SPEED_200GB 0x7d0UL + #define PORT_PHY_QCFG_RESP_FORCE_PAM4_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_FORCE_PAM4_LINK_SPEED_200GB + __le16 auto_pam4_link_speed_mask; + #define PORT_PHY_QCFG_RESP_AUTO_PAM4_LINK_SPEED_MASK_50G 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_PAM4_LINK_SPEED_MASK_100G 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_PAM4_LINK_SPEED_MASK_200G 0x4UL + u8 link_partner_pam4_adv_speeds; + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_PAM4_ADV_SPEEDS_50GB 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_PAM4_ADV_SPEEDS_100GB 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_PAM4_ADV_SPEEDS_200GB 0x4UL + u8 link_down_reason; + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_RF 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_OTP_SPEED_VIOLATION 0x2UL + __le16 support_speeds2; + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_1GB 0x1UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_10GB 0x2UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_25GB 0x4UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_40GB 0x8UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_50GB 0x10UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_100GB 0x20UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_50GB_PAM4_56 0x40UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_100GB_PAM4_56 0x80UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_200GB_PAM4_56 0x100UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_400GB_PAM4_56 0x200UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_100GB_PAM4_112 0x400UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_200GB_PAM4_112 0x800UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_400GB_PAM4_112 0x1000UL + #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_800GB_PAM4_112 0x2000UL + __le16 force_link_speeds2; + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_50GB_PAM4_56 0x1f5UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_100GB_PAM4_56 0x3e9UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_200GB_PAM4_56 0x7d1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_400GB_PAM4_56 0xfa1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_100GB_PAM4_112 0x3eaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_200GB_PAM4_112 0x7d2UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_400GB_PAM4_112 0xfa2UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_800GB_PAM4_112 0x1f42UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_LAST PORT_PHY_QCFG_RESP_FORCE_LINK_SPEEDS2_800GB_PAM4_112 + __le16 auto_link_speeds2; + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_1GB 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_10GB 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_25GB 0x4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_40GB 0x8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_50GB 0x10UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_100GB 0x20UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_50GB_PAM4_56 0x40UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_100GB_PAM4_56 0x80UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_200GB_PAM4_56 0x100UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_400GB_PAM4_56 0x200UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_100GB_PAM4_112 0x400UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_200GB_PAM4_112 0x800UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_400GB_PAM4_112 0x1000UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEEDS2_800GB_PAM4_112 0x2000UL + u8 active_lanes; + u8 valid; +}; + +/* hwrm_port_mac_cfg_input (size:448b/56B) */ +struct hwrm_port_mac_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define PORT_MAC_CFG_REQ_FLAGS_MATCH_LINK 0x1UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_ENABLE 0x2UL + #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_ENABLE 0x4UL + #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_ENABLE 0x8UL + #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE 0x10UL + #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE 0x20UL + #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE 0x40UL + #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE 0x80UL + #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE 0x100UL + #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE 0x200UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE 0x400UL + #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE 0x800UL + #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE 0x1000UL + #define PORT_MAC_CFG_REQ_FLAGS_PTP_ONE_STEP_TX_TS 0x2000UL + #define PORT_MAC_CFG_REQ_FLAGS_ALL_RX_TS_CAPTURE_ENABLE 0x4000UL + #define PORT_MAC_CFG_REQ_FLAGS_ALL_RX_TS_CAPTURE_DISABLE 0x8000UL + __le32 enables; + #define PORT_MAC_CFG_REQ_ENABLES_IPG 0x1UL + #define PORT_MAC_CFG_REQ_ENABLES_LPBK 0x2UL + #define PORT_MAC_CFG_REQ_ENABLES_VLAN_PRI2COS_MAP_PRI 0x4UL + #define PORT_MAC_CFG_REQ_ENABLES_TUNNEL_PRI2COS_MAP_PRI 0x10UL + #define PORT_MAC_CFG_REQ_ENABLES_DSCP2COS_MAP_PRI 0x20UL + #define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE 0x40UL + #define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE 0x80UL + #define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG 0x100UL + #define PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB 0x200UL + #define PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE 0x400UL + #define PORT_MAC_CFG_REQ_ENABLES_PTP_LOAD_CONTROL 0x800UL + __le16 port_id; + u8 ipg; + u8 lpbk; + #define PORT_MAC_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_REQ_LPBK_REMOTE 0x2UL + #define PORT_MAC_CFG_REQ_LPBK_LAST PORT_MAC_CFG_REQ_LPBK_REMOTE + u8 vlan_pri2cos_map_pri; + u8 reserved1; + u8 tunnel_pri2cos_map_pri; + u8 dscp2pri_map_pri; + __le16 rx_ts_capture_ptp_msg_type; + __le16 tx_ts_capture_ptp_msg_type; + u8 cos_field_cfg; + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_RSVD1 0x1UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_MASK 0x6UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_SFT 1 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST (0x0UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER (0x1UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST (0x2UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK 0x18UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT 3 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST (0x0UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER (0x1UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST (0x2UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK 0xe0UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 + u8 unused_0[3]; + __le32 ptp_freq_adj_ppb; + u8 unused_1[3]; + u8 ptp_load_control; + #define PORT_MAC_CFG_REQ_PTP_LOAD_CONTROL_NONE 0x0UL + #define PORT_MAC_CFG_REQ_PTP_LOAD_CONTROL_IMMEDIATE 0x1UL + #define PORT_MAC_CFG_REQ_PTP_LOAD_CONTROL_PPS_EVENT 0x2UL + #define PORT_MAC_CFG_REQ_PTP_LOAD_CONTROL_LAST PORT_MAC_CFG_REQ_PTP_LOAD_CONTROL_PPS_EVENT + __le64 ptp_adj_phase; +}; + +/* hwrm_port_mac_cfg_output (size:128b/16B) */ +struct hwrm_port_mac_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 mru; + __le16 mtu; + u8 ipg; + u8 lpbk; + #define PORT_MAC_CFG_RESP_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_RESP_LPBK_REMOTE 0x2UL + #define PORT_MAC_CFG_RESP_LPBK_LAST PORT_MAC_CFG_RESP_LPBK_REMOTE + u8 unused_0; + u8 valid; +}; + +/* hwrm_port_mac_ptp_qcfg_input (size:192b/24B) */ +struct hwrm_port_mac_ptp_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_mac_ptp_qcfg_output (size:704b/88B) */ +struct hwrm_port_mac_ptp_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS 0x1UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS 0x4UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS 0x8UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK 0x10UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED 0x20UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_64B_PHC_TIME 0x40UL + u8 unused_0[3]; + __le32 rx_ts_reg_off_lower; + __le32 rx_ts_reg_off_upper; + __le32 rx_ts_reg_off_seq_id; + __le32 rx_ts_reg_off_src_id_0; + __le32 rx_ts_reg_off_src_id_1; + __le32 rx_ts_reg_off_src_id_2; + __le32 rx_ts_reg_off_domain_id; + __le32 rx_ts_reg_off_fifo; + __le32 rx_ts_reg_off_fifo_adv; + __le32 rx_ts_reg_off_granularity; + __le32 tx_ts_reg_off_lower; + __le32 tx_ts_reg_off_upper; + __le32 tx_ts_reg_off_seq_id; + __le32 tx_ts_reg_off_fifo; + __le32 tx_ts_reg_off_granularity; + __le32 ts_ref_clock_reg_lower; + __le32 ts_ref_clock_reg_upper; + u8 unused_1[7]; + u8 valid; +}; + +/* tx_port_stats (size:3264b/408B) */ +struct tx_port_stats { + __le64 tx_64b_frames; + __le64 tx_65b_127b_frames; + __le64 tx_128b_255b_frames; + __le64 tx_256b_511b_frames; + __le64 tx_512b_1023b_frames; + __le64 tx_1024b_1518b_frames; + __le64 tx_good_vlan_frames; + __le64 tx_1519b_2047b_frames; + __le64 tx_2048b_4095b_frames; + __le64 tx_4096b_9216b_frames; + __le64 tx_9217b_16383b_frames; + __le64 tx_good_frames; + __le64 tx_total_frames; + __le64 tx_ucast_frames; + __le64 tx_mcast_frames; + __le64 tx_bcast_frames; + __le64 tx_pause_frames; + __le64 tx_pfc_frames; + __le64 tx_jabber_frames; + __le64 tx_fcs_err_frames; + __le64 tx_control_frames; + __le64 tx_oversz_frames; + __le64 tx_single_dfrl_frames; + __le64 tx_multi_dfrl_frames; + __le64 tx_single_coll_frames; + __le64 tx_multi_coll_frames; + __le64 tx_late_coll_frames; + __le64 tx_excessive_coll_frames; + __le64 tx_frag_frames; + __le64 tx_err; + __le64 tx_tagged_frames; + __le64 tx_dbl_tagged_frames; + __le64 tx_runt_frames; + __le64 tx_fifo_underruns; + __le64 tx_pfc_ena_frames_pri0; + __le64 tx_pfc_ena_frames_pri1; + __le64 tx_pfc_ena_frames_pri2; + __le64 tx_pfc_ena_frames_pri3; + __le64 tx_pfc_ena_frames_pri4; + __le64 tx_pfc_ena_frames_pri5; + __le64 tx_pfc_ena_frames_pri6; + __le64 tx_pfc_ena_frames_pri7; + __le64 tx_eee_lpi_events; + __le64 tx_eee_lpi_duration; + __le64 tx_llfc_logical_msgs; + __le64 tx_hcfc_msgs; + __le64 tx_total_collisions; + __le64 tx_bytes; + __le64 tx_xthol_frames; + __le64 tx_stat_discard; + __le64 tx_stat_error; +}; + +/* rx_port_stats (size:4224b/528B) */ +struct rx_port_stats { + __le64 rx_64b_frames; + __le64 rx_65b_127b_frames; + __le64 rx_128b_255b_frames; + __le64 rx_256b_511b_frames; + __le64 rx_512b_1023b_frames; + __le64 rx_1024b_1518b_frames; + __le64 rx_good_vlan_frames; + __le64 rx_1519b_2047b_frames; + __le64 rx_2048b_4095b_frames; + __le64 rx_4096b_9216b_frames; + __le64 rx_9217b_16383b_frames; + __le64 rx_total_frames; + __le64 rx_ucast_frames; + __le64 rx_mcast_frames; + __le64 rx_bcast_frames; + __le64 rx_fcs_err_frames; + __le64 rx_ctrl_frames; + __le64 rx_pause_frames; + __le64 rx_pfc_frames; + __le64 rx_unsupported_opcode_frames; + __le64 rx_unsupported_da_pausepfc_frames; + __le64 rx_wrong_sa_frames; + __le64 rx_align_err_frames; + __le64 rx_oor_len_frames; + __le64 rx_code_err_frames; + __le64 rx_false_carrier_frames; + __le64 rx_ovrsz_frames; + __le64 rx_jbr_frames; + __le64 rx_mtu_err_frames; + __le64 rx_match_crc_frames; + __le64 rx_promiscuous_frames; + __le64 rx_tagged_frames; + __le64 rx_double_tagged_frames; + __le64 rx_trunc_frames; + __le64 rx_good_frames; + __le64 rx_pfc_xon2xoff_frames_pri0; + __le64 rx_pfc_xon2xoff_frames_pri1; + __le64 rx_pfc_xon2xoff_frames_pri2; + __le64 rx_pfc_xon2xoff_frames_pri3; + __le64 rx_pfc_xon2xoff_frames_pri4; + __le64 rx_pfc_xon2xoff_frames_pri5; + __le64 rx_pfc_xon2xoff_frames_pri6; + __le64 rx_pfc_xon2xoff_frames_pri7; + __le64 rx_pfc_ena_frames_pri0; + __le64 rx_pfc_ena_frames_pri1; + __le64 rx_pfc_ena_frames_pri2; + __le64 rx_pfc_ena_frames_pri3; + __le64 rx_pfc_ena_frames_pri4; + __le64 rx_pfc_ena_frames_pri5; + __le64 rx_pfc_ena_frames_pri6; + __le64 rx_pfc_ena_frames_pri7; + __le64 rx_sch_crc_err_frames; + __le64 rx_undrsz_frames; + __le64 rx_frag_frames; + __le64 rx_eee_lpi_events; + __le64 rx_eee_lpi_duration; + __le64 rx_llfc_physical_msgs; + __le64 rx_llfc_logical_msgs; + __le64 rx_llfc_msgs_with_crc_err; + __le64 rx_hcfc_msgs; + __le64 rx_hcfc_msgs_with_crc_err; + __le64 rx_bytes; + __le64 rx_runt_bytes; + __le64 rx_runt_frames; + __le64 rx_stat_discard; + __le64 rx_stat_err; +}; + +/* hwrm_port_qstats_input (size:320b/40B) */ +struct hwrm_port_qstats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 flags; + #define PORT_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[5]; + __le64 tx_stat_host_addr; + __le64 rx_stat_host_addr; +}; + +/* hwrm_port_qstats_output (size:128b/16B) */ +struct hwrm_port_qstats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 tx_stat_size; + __le16 rx_stat_size; + u8 flags; + #define PORT_QSTATS_RESP_FLAGS_CLEARED 0x1UL + u8 unused_0[2]; + u8 valid; +}; + +/* tx_port_stats_ext (size:2048b/256B) */ +struct tx_port_stats_ext { + __le64 tx_bytes_cos0; + __le64 tx_bytes_cos1; + __le64 tx_bytes_cos2; + __le64 tx_bytes_cos3; + __le64 tx_bytes_cos4; + __le64 tx_bytes_cos5; + __le64 tx_bytes_cos6; + __le64 tx_bytes_cos7; + __le64 tx_packets_cos0; + __le64 tx_packets_cos1; + __le64 tx_packets_cos2; + __le64 tx_packets_cos3; + __le64 tx_packets_cos4; + __le64 tx_packets_cos5; + __le64 tx_packets_cos6; + __le64 tx_packets_cos7; + __le64 pfc_pri0_tx_duration_us; + __le64 pfc_pri0_tx_transitions; + __le64 pfc_pri1_tx_duration_us; + __le64 pfc_pri1_tx_transitions; + __le64 pfc_pri2_tx_duration_us; + __le64 pfc_pri2_tx_transitions; + __le64 pfc_pri3_tx_duration_us; + __le64 pfc_pri3_tx_transitions; + __le64 pfc_pri4_tx_duration_us; + __le64 pfc_pri4_tx_transitions; + __le64 pfc_pri5_tx_duration_us; + __le64 pfc_pri5_tx_transitions; + __le64 pfc_pri6_tx_duration_us; + __le64 pfc_pri6_tx_transitions; + __le64 pfc_pri7_tx_duration_us; + __le64 pfc_pri7_tx_transitions; +}; + +/* rx_port_stats_ext (size:3904b/488B) */ +struct rx_port_stats_ext { + __le64 link_down_events; + __le64 continuous_pause_events; + __le64 resume_pause_events; + __le64 continuous_roce_pause_events; + __le64 resume_roce_pause_events; + __le64 rx_bytes_cos0; + __le64 rx_bytes_cos1; + __le64 rx_bytes_cos2; + __le64 rx_bytes_cos3; + __le64 rx_bytes_cos4; + __le64 rx_bytes_cos5; + __le64 rx_bytes_cos6; + __le64 rx_bytes_cos7; + __le64 rx_packets_cos0; + __le64 rx_packets_cos1; + __le64 rx_packets_cos2; + __le64 rx_packets_cos3; + __le64 rx_packets_cos4; + __le64 rx_packets_cos5; + __le64 rx_packets_cos6; + __le64 rx_packets_cos7; + __le64 pfc_pri0_rx_duration_us; + __le64 pfc_pri0_rx_transitions; + __le64 pfc_pri1_rx_duration_us; + __le64 pfc_pri1_rx_transitions; + __le64 pfc_pri2_rx_duration_us; + __le64 pfc_pri2_rx_transitions; + __le64 pfc_pri3_rx_duration_us; + __le64 pfc_pri3_rx_transitions; + __le64 pfc_pri4_rx_duration_us; + __le64 pfc_pri4_rx_transitions; + __le64 pfc_pri5_rx_duration_us; + __le64 pfc_pri5_rx_transitions; + __le64 pfc_pri6_rx_duration_us; + __le64 pfc_pri6_rx_transitions; + __le64 pfc_pri7_rx_duration_us; + __le64 pfc_pri7_rx_transitions; + __le64 rx_bits; + __le64 rx_buffer_passed_threshold; + __le64 rx_pcs_symbol_err; + __le64 rx_corrected_bits; + __le64 rx_discard_bytes_cos0; + __le64 rx_discard_bytes_cos1; + __le64 rx_discard_bytes_cos2; + __le64 rx_discard_bytes_cos3; + __le64 rx_discard_bytes_cos4; + __le64 rx_discard_bytes_cos5; + __le64 rx_discard_bytes_cos6; + __le64 rx_discard_bytes_cos7; + __le64 rx_discard_packets_cos0; + __le64 rx_discard_packets_cos1; + __le64 rx_discard_packets_cos2; + __le64 rx_discard_packets_cos3; + __le64 rx_discard_packets_cos4; + __le64 rx_discard_packets_cos5; + __le64 rx_discard_packets_cos6; + __le64 rx_discard_packets_cos7; + __le64 rx_fec_corrected_blocks; + __le64 rx_fec_uncorrectable_blocks; + __le64 rx_filter_miss; + __le64 rx_fec_symbol_err; +}; + +/* hwrm_port_qstats_ext_input (size:320b/40B) */ +struct hwrm_port_qstats_ext_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + __le16 tx_stat_size; + __le16 rx_stat_size; + u8 flags; + #define PORT_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0; + __le64 tx_stat_host_addr; + __le64 rx_stat_host_addr; +}; + +/* hwrm_port_qstats_ext_output (size:128b/16B) */ +struct hwrm_port_qstats_ext_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 tx_stat_size; + __le16 rx_stat_size; + __le16 total_active_cos_queues; + u8 flags; + #define PORT_QSTATS_EXT_RESP_FLAGS_CLEAR_ROCE_COUNTERS_SUPPORTED 0x1UL + #define PORT_QSTATS_EXT_RESP_FLAGS_CLEARED 0x2UL + u8 valid; +}; + +/* hwrm_port_lpbk_qstats_input (size:256b/32B) */ +struct hwrm_port_lpbk_qstats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 lpbk_stat_size; + u8 flags; + #define PORT_LPBK_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[5]; + __le64 lpbk_stat_host_addr; +}; + +/* hwrm_port_lpbk_qstats_output (size:128b/16B) */ +struct hwrm_port_lpbk_qstats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 lpbk_stat_size; + u8 unused_0[5]; + u8 valid; +}; + +/* port_lpbk_stats (size:640b/80B) */ +struct port_lpbk_stats { + __le64 lpbk_ucast_frames; + __le64 lpbk_mcast_frames; + __le64 lpbk_bcast_frames; + __le64 lpbk_ucast_bytes; + __le64 lpbk_mcast_bytes; + __le64 lpbk_bcast_bytes; + __le64 lpbk_tx_discards; + __le64 lpbk_tx_errors; + __le64 lpbk_rx_discards; + __le64 lpbk_rx_errors; +}; + +/* hwrm_port_ecn_qstats_input (size:256b/32B) */ +struct hwrm_port_ecn_qstats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + __le16 ecn_stat_buf_size; + u8 flags; + #define PORT_ECN_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[3]; + __le64 ecn_stat_host_addr; +}; + +/* hwrm_port_ecn_qstats_output (size:128b/16B) */ +struct hwrm_port_ecn_qstats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 ecn_stat_buf_size; + u8 mark_en; + u8 unused_0[4]; + u8 valid; +}; + +/* port_stats_ecn (size:512b/64B) */ +struct port_stats_ecn { + __le64 mark_cnt_cos0; + __le64 mark_cnt_cos1; + __le64 mark_cnt_cos2; + __le64 mark_cnt_cos3; + __le64 mark_cnt_cos4; + __le64 mark_cnt_cos5; + __le64 mark_cnt_cos6; + __le64 mark_cnt_cos7; +}; + +/* hwrm_port_clr_stats_input (size:192b/24B) */ +struct hwrm_port_clr_stats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 flags; + #define PORT_CLR_STATS_REQ_FLAGS_ROCE_COUNTERS 0x1UL + u8 unused_0[5]; +}; + +/* hwrm_port_clr_stats_output (size:128b/16B) */ +struct hwrm_port_clr_stats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_lpbk_clr_stats_input (size:192b/24B) */ +struct hwrm_port_lpbk_clr_stats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_lpbk_clr_stats_output (size:128b/16B) */ +struct hwrm_port_lpbk_clr_stats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_ts_query_input (size:320b/40B) */ +struct hwrm_port_ts_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define PORT_TS_QUERY_REQ_FLAGS_PATH 0x1UL + #define PORT_TS_QUERY_REQ_FLAGS_PATH_TX 0x0UL + #define PORT_TS_QUERY_REQ_FLAGS_PATH_RX 0x1UL + #define PORT_TS_QUERY_REQ_FLAGS_PATH_LAST PORT_TS_QUERY_REQ_FLAGS_PATH_RX + #define PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME 0x2UL + __le16 port_id; + u8 unused_0[2]; + __le16 enables; + #define PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT 0x1UL + #define PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID 0x2UL + #define PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET 0x4UL + __le16 ts_req_timeout; + __le32 ptp_seq_id; + __le16 ptp_hdr_offset; + u8 unused_1[6]; +}; + +/* hwrm_port_ts_query_output (size:192b/24B) */ +struct hwrm_port_ts_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 ptp_msg_ts; + __le16 ptp_msg_seqid; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_port_phy_qcaps_input (size:192b/24B) */ +struct hwrm_port_phy_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_phy_qcaps_output (size:320b/40B) */ +struct hwrm_port_phy_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED 0x1UL + #define PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED 0x2UL + #define PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED 0x4UL + #define PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED 0x8UL + #define PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET 0x10UL + #define PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED 0x20UL + #define PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN 0x40UL + #define PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS 0x80UL + u8 port_cnt; + #define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN 0x0UL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_1 0x1UL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_2 0x2UL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_3 0x3UL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_4 0x4UL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_12 0xcUL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_12 + __le16 supported_speeds_force_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_1GBHD 0x4UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_1GB 0x8UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_2GB 0x10UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_2_5GB 0x20UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10GB 0x40UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_20GB 0x80UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_25GB 0x100UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_40GB 0x200UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_50GB 0x400UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100GB 0x800UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10MBHD 0x1000UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10MB 0x2000UL + __le16 supported_speeds_auto_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100MBHD 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100MB 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_1GBHD 0x4UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_1GB 0x8UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_2GB 0x10UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_2_5GB 0x20UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10GB 0x40UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_20GB 0x80UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_25GB 0x100UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_40GB 0x200UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_50GB 0x400UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100GB 0x800UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10MBHD 0x1000UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10MB 0x2000UL + __le16 supported_speeds_eee_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_RSVD1 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_100MB 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_RSVD2 0x4UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_1GB 0x8UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_RSVD3 0x10UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_RSVD4 0x20UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_10GB 0x40UL + __le32 tx_lpi_timer_low; + #define PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK 0xffffffUL + #define PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_SFT 0 + #define PORT_PHY_QCAPS_RESP_RSVD2_MASK 0xff000000UL + #define PORT_PHY_QCAPS_RESP_RSVD2_SFT 24 + __le32 valid_tx_lpi_timer_high; + #define PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK 0xffffffUL + #define PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_SFT 0 + #define PORT_PHY_QCAPS_RESP_RSVD_MASK 0xff000000UL + #define PORT_PHY_QCAPS_RESP_RSVD_SFT 24 + __le16 supported_pam4_speeds_auto_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_AUTO_MODE_50G 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_AUTO_MODE_100G 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_AUTO_MODE_200G 0x4UL + __le16 supported_pam4_speeds_force_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_50G 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_100G 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_200G 0x4UL + __le16 flags2; + #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_REMOTE_LPBK_UNSUPPORTED 0x10UL + u8 internal_port_cnt; + u8 unused_0; + __le16 supported_speeds2_force_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_1GB 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_10GB 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_25GB 0x4UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_40GB 0x8UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_50GB 0x10UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_100GB 0x20UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_50GB_PAM4_56 0x40UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_100GB_PAM4_56 0x80UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_200GB_PAM4_56 0x100UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_400GB_PAM4_56 0x200UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_100GB_PAM4_112 0x400UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_200GB_PAM4_112 0x800UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_400GB_PAM4_112 0x1000UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_FORCE_MODE_800GB_PAM4_112 0x2000UL + __le16 supported_speeds2_auto_mode; + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_1GB 0x1UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_10GB 0x2UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_25GB 0x4UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_40GB 0x8UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_50GB 0x10UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_100GB 0x20UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_50GB_PAM4_56 0x40UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_100GB_PAM4_56 0x80UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_200GB_PAM4_56 0x100UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_400GB_PAM4_56 0x200UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_100GB_PAM4_112 0x400UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_200GB_PAM4_112 0x800UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_400GB_PAM4_112 0x1000UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS2_AUTO_MODE_800GB_PAM4_112 0x2000UL + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_port_phy_i2c_write_input (size:832b/104B) */ +struct hwrm_port_phy_i2c_write_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + __le32 enables; + #define PORT_PHY_I2C_WRITE_REQ_ENABLES_PAGE_OFFSET 0x1UL + #define PORT_PHY_I2C_WRITE_REQ_ENABLES_BANK_NUMBER 0x2UL + __le16 port_id; + u8 i2c_slave_addr; + u8 bank_number; + __le16 page_number; + __le16 page_offset; + u8 data_length; + u8 unused_1[7]; + __le32 data[16]; +}; + +/* hwrm_port_phy_i2c_write_output (size:128b/16B) */ +struct hwrm_port_phy_i2c_write_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_phy_i2c_read_input (size:320b/40B) */ +struct hwrm_port_phy_i2c_read_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + __le32 enables; + #define PORT_PHY_I2C_READ_REQ_ENABLES_PAGE_OFFSET 0x1UL + #define PORT_PHY_I2C_READ_REQ_ENABLES_BANK_NUMBER 0x2UL + __le16 port_id; + u8 i2c_slave_addr; + u8 bank_number; + __le16 page_number; + __le16 page_offset; + u8 data_length; + u8 unused_1[7]; +}; + +/* hwrm_port_phy_i2c_read_output (size:640b/80B) */ +struct hwrm_port_phy_i2c_read_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 data[16]; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_phy_mdio_write_input (size:320b/40B) */ +struct hwrm_port_phy_mdio_write_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 unused_0[2]; + __le16 port_id; + u8 phy_addr; + u8 dev_addr; + __le16 reg_addr; + __le16 reg_data; + u8 cl45_mdio; + u8 unused_1[7]; +}; + +/* hwrm_port_phy_mdio_write_output (size:128b/16B) */ +struct hwrm_port_phy_mdio_write_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_phy_mdio_read_input (size:256b/32B) */ +struct hwrm_port_phy_mdio_read_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 unused_0[2]; + __le16 port_id; + u8 phy_addr; + u8 dev_addr; + __le16 reg_addr; + u8 cl45_mdio; + u8 unused_1; +}; + +/* hwrm_port_phy_mdio_read_output (size:128b/16B) */ +struct hwrm_port_phy_mdio_read_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 reg_data; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_port_led_cfg_input (size:512b/64B) */ +struct hwrm_port_led_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define PORT_LED_CFG_REQ_ENABLES_LED0_ID 0x1UL + #define PORT_LED_CFG_REQ_ENABLES_LED0_STATE 0x2UL + #define PORT_LED_CFG_REQ_ENABLES_LED0_COLOR 0x4UL + #define PORT_LED_CFG_REQ_ENABLES_LED0_BLINK_ON 0x8UL + #define PORT_LED_CFG_REQ_ENABLES_LED0_BLINK_OFF 0x10UL + #define PORT_LED_CFG_REQ_ENABLES_LED0_GROUP_ID 0x20UL + #define PORT_LED_CFG_REQ_ENABLES_LED1_ID 0x40UL + #define PORT_LED_CFG_REQ_ENABLES_LED1_STATE 0x80UL + #define PORT_LED_CFG_REQ_ENABLES_LED1_COLOR 0x100UL + #define PORT_LED_CFG_REQ_ENABLES_LED1_BLINK_ON 0x200UL + #define PORT_LED_CFG_REQ_ENABLES_LED1_BLINK_OFF 0x400UL + #define PORT_LED_CFG_REQ_ENABLES_LED1_GROUP_ID 0x800UL + #define PORT_LED_CFG_REQ_ENABLES_LED2_ID 0x1000UL + #define PORT_LED_CFG_REQ_ENABLES_LED2_STATE 0x2000UL + #define PORT_LED_CFG_REQ_ENABLES_LED2_COLOR 0x4000UL + #define PORT_LED_CFG_REQ_ENABLES_LED2_BLINK_ON 0x8000UL + #define PORT_LED_CFG_REQ_ENABLES_LED2_BLINK_OFF 0x10000UL + #define PORT_LED_CFG_REQ_ENABLES_LED2_GROUP_ID 0x20000UL + #define PORT_LED_CFG_REQ_ENABLES_LED3_ID 0x40000UL + #define PORT_LED_CFG_REQ_ENABLES_LED3_STATE 0x80000UL + #define PORT_LED_CFG_REQ_ENABLES_LED3_COLOR 0x100000UL + #define PORT_LED_CFG_REQ_ENABLES_LED3_BLINK_ON 0x200000UL + #define PORT_LED_CFG_REQ_ENABLES_LED3_BLINK_OFF 0x400000UL + #define PORT_LED_CFG_REQ_ENABLES_LED3_GROUP_ID 0x800000UL + __le16 port_id; + u8 num_leds; + u8 rsvd; + u8 led0_id; + u8 led0_state; + #define PORT_LED_CFG_REQ_LED0_STATE_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED0_STATE_OFF 0x1UL + #define PORT_LED_CFG_REQ_LED0_STATE_ON 0x2UL + #define PORT_LED_CFG_REQ_LED0_STATE_BLINK 0x3UL + #define PORT_LED_CFG_REQ_LED0_STATE_BLINKALT 0x4UL + #define PORT_LED_CFG_REQ_LED0_STATE_LAST PORT_LED_CFG_REQ_LED0_STATE_BLINKALT + u8 led0_color; + #define PORT_LED_CFG_REQ_LED0_COLOR_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED0_COLOR_AMBER 0x1UL + #define PORT_LED_CFG_REQ_LED0_COLOR_GREEN 0x2UL + #define PORT_LED_CFG_REQ_LED0_COLOR_GREENAMBER 0x3UL + #define PORT_LED_CFG_REQ_LED0_COLOR_LAST PORT_LED_CFG_REQ_LED0_COLOR_GREENAMBER + u8 unused_0; + __le16 led0_blink_on; + __le16 led0_blink_off; + u8 led0_group_id; + u8 rsvd0; + u8 led1_id; + u8 led1_state; + #define PORT_LED_CFG_REQ_LED1_STATE_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED1_STATE_OFF 0x1UL + #define PORT_LED_CFG_REQ_LED1_STATE_ON 0x2UL + #define PORT_LED_CFG_REQ_LED1_STATE_BLINK 0x3UL + #define PORT_LED_CFG_REQ_LED1_STATE_BLINKALT 0x4UL + #define PORT_LED_CFG_REQ_LED1_STATE_LAST PORT_LED_CFG_REQ_LED1_STATE_BLINKALT + u8 led1_color; + #define PORT_LED_CFG_REQ_LED1_COLOR_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED1_COLOR_AMBER 0x1UL + #define PORT_LED_CFG_REQ_LED1_COLOR_GREEN 0x2UL + #define PORT_LED_CFG_REQ_LED1_COLOR_GREENAMBER 0x3UL + #define PORT_LED_CFG_REQ_LED1_COLOR_LAST PORT_LED_CFG_REQ_LED1_COLOR_GREENAMBER + u8 unused_1; + __le16 led1_blink_on; + __le16 led1_blink_off; + u8 led1_group_id; + u8 rsvd1; + u8 led2_id; + u8 led2_state; + #define PORT_LED_CFG_REQ_LED2_STATE_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED2_STATE_OFF 0x1UL + #define PORT_LED_CFG_REQ_LED2_STATE_ON 0x2UL + #define PORT_LED_CFG_REQ_LED2_STATE_BLINK 0x3UL + #define PORT_LED_CFG_REQ_LED2_STATE_BLINKALT 0x4UL + #define PORT_LED_CFG_REQ_LED2_STATE_LAST PORT_LED_CFG_REQ_LED2_STATE_BLINKALT + u8 led2_color; + #define PORT_LED_CFG_REQ_LED2_COLOR_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED2_COLOR_AMBER 0x1UL + #define PORT_LED_CFG_REQ_LED2_COLOR_GREEN 0x2UL + #define PORT_LED_CFG_REQ_LED2_COLOR_GREENAMBER 0x3UL + #define PORT_LED_CFG_REQ_LED2_COLOR_LAST PORT_LED_CFG_REQ_LED2_COLOR_GREENAMBER + u8 unused_2; + __le16 led2_blink_on; + __le16 led2_blink_off; + u8 led2_group_id; + u8 rsvd2; + u8 led3_id; + u8 led3_state; + #define PORT_LED_CFG_REQ_LED3_STATE_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED3_STATE_OFF 0x1UL + #define PORT_LED_CFG_REQ_LED3_STATE_ON 0x2UL + #define PORT_LED_CFG_REQ_LED3_STATE_BLINK 0x3UL + #define PORT_LED_CFG_REQ_LED3_STATE_BLINKALT 0x4UL + #define PORT_LED_CFG_REQ_LED3_STATE_LAST PORT_LED_CFG_REQ_LED3_STATE_BLINKALT + u8 led3_color; + #define PORT_LED_CFG_REQ_LED3_COLOR_DEFAULT 0x0UL + #define PORT_LED_CFG_REQ_LED3_COLOR_AMBER 0x1UL + #define PORT_LED_CFG_REQ_LED3_COLOR_GREEN 0x2UL + #define PORT_LED_CFG_REQ_LED3_COLOR_GREENAMBER 0x3UL + #define PORT_LED_CFG_REQ_LED3_COLOR_LAST PORT_LED_CFG_REQ_LED3_COLOR_GREENAMBER + u8 unused_3; + __le16 led3_blink_on; + __le16 led3_blink_off; + u8 led3_group_id; + u8 rsvd3; +}; + +/* hwrm_port_led_cfg_output (size:128b/16B) */ +struct hwrm_port_led_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_port_led_qcfg_input (size:192b/24B) */ +struct hwrm_port_led_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_led_qcfg_output (size:448b/56B) */ +struct hwrm_port_led_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 num_leds; + u8 led0_id; + u8 led0_type; + #define PORT_LED_QCFG_RESP_LED0_TYPE_SPEED 0x0UL + #define PORT_LED_QCFG_RESP_LED0_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCFG_RESP_LED0_TYPE_INVALID 0xffUL + #define PORT_LED_QCFG_RESP_LED0_TYPE_LAST PORT_LED_QCFG_RESP_LED0_TYPE_INVALID + u8 led0_state; + #define PORT_LED_QCFG_RESP_LED0_STATE_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED0_STATE_OFF 0x1UL + #define PORT_LED_QCFG_RESP_LED0_STATE_ON 0x2UL + #define PORT_LED_QCFG_RESP_LED0_STATE_BLINK 0x3UL + #define PORT_LED_QCFG_RESP_LED0_STATE_BLINKALT 0x4UL + #define PORT_LED_QCFG_RESP_LED0_STATE_LAST PORT_LED_QCFG_RESP_LED0_STATE_BLINKALT + u8 led0_color; + #define PORT_LED_QCFG_RESP_LED0_COLOR_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED0_COLOR_AMBER 0x1UL + #define PORT_LED_QCFG_RESP_LED0_COLOR_GREEN 0x2UL + #define PORT_LED_QCFG_RESP_LED0_COLOR_GREENAMBER 0x3UL + #define PORT_LED_QCFG_RESP_LED0_COLOR_LAST PORT_LED_QCFG_RESP_LED0_COLOR_GREENAMBER + u8 unused_0; + __le16 led0_blink_on; + __le16 led0_blink_off; + u8 led0_group_id; + u8 led1_id; + u8 led1_type; + #define PORT_LED_QCFG_RESP_LED1_TYPE_SPEED 0x0UL + #define PORT_LED_QCFG_RESP_LED1_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCFG_RESP_LED1_TYPE_INVALID 0xffUL + #define PORT_LED_QCFG_RESP_LED1_TYPE_LAST PORT_LED_QCFG_RESP_LED1_TYPE_INVALID + u8 led1_state; + #define PORT_LED_QCFG_RESP_LED1_STATE_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED1_STATE_OFF 0x1UL + #define PORT_LED_QCFG_RESP_LED1_STATE_ON 0x2UL + #define PORT_LED_QCFG_RESP_LED1_STATE_BLINK 0x3UL + #define PORT_LED_QCFG_RESP_LED1_STATE_BLINKALT 0x4UL + #define PORT_LED_QCFG_RESP_LED1_STATE_LAST PORT_LED_QCFG_RESP_LED1_STATE_BLINKALT + u8 led1_color; + #define PORT_LED_QCFG_RESP_LED1_COLOR_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED1_COLOR_AMBER 0x1UL + #define PORT_LED_QCFG_RESP_LED1_COLOR_GREEN 0x2UL + #define PORT_LED_QCFG_RESP_LED1_COLOR_GREENAMBER 0x3UL + #define PORT_LED_QCFG_RESP_LED1_COLOR_LAST PORT_LED_QCFG_RESP_LED1_COLOR_GREENAMBER + u8 unused_1; + __le16 led1_blink_on; + __le16 led1_blink_off; + u8 led1_group_id; + u8 led2_id; + u8 led2_type; + #define PORT_LED_QCFG_RESP_LED2_TYPE_SPEED 0x0UL + #define PORT_LED_QCFG_RESP_LED2_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCFG_RESP_LED2_TYPE_INVALID 0xffUL + #define PORT_LED_QCFG_RESP_LED2_TYPE_LAST PORT_LED_QCFG_RESP_LED2_TYPE_INVALID + u8 led2_state; + #define PORT_LED_QCFG_RESP_LED2_STATE_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED2_STATE_OFF 0x1UL + #define PORT_LED_QCFG_RESP_LED2_STATE_ON 0x2UL + #define PORT_LED_QCFG_RESP_LED2_STATE_BLINK 0x3UL + #define PORT_LED_QCFG_RESP_LED2_STATE_BLINKALT 0x4UL + #define PORT_LED_QCFG_RESP_LED2_STATE_LAST PORT_LED_QCFG_RESP_LED2_STATE_BLINKALT + u8 led2_color; + #define PORT_LED_QCFG_RESP_LED2_COLOR_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED2_COLOR_AMBER 0x1UL + #define PORT_LED_QCFG_RESP_LED2_COLOR_GREEN 0x2UL + #define PORT_LED_QCFG_RESP_LED2_COLOR_GREENAMBER 0x3UL + #define PORT_LED_QCFG_RESP_LED2_COLOR_LAST PORT_LED_QCFG_RESP_LED2_COLOR_GREENAMBER + u8 unused_2; + __le16 led2_blink_on; + __le16 led2_blink_off; + u8 led2_group_id; + u8 led3_id; + u8 led3_type; + #define PORT_LED_QCFG_RESP_LED3_TYPE_SPEED 0x0UL + #define PORT_LED_QCFG_RESP_LED3_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCFG_RESP_LED3_TYPE_INVALID 0xffUL + #define PORT_LED_QCFG_RESP_LED3_TYPE_LAST PORT_LED_QCFG_RESP_LED3_TYPE_INVALID + u8 led3_state; + #define PORT_LED_QCFG_RESP_LED3_STATE_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED3_STATE_OFF 0x1UL + #define PORT_LED_QCFG_RESP_LED3_STATE_ON 0x2UL + #define PORT_LED_QCFG_RESP_LED3_STATE_BLINK 0x3UL + #define PORT_LED_QCFG_RESP_LED3_STATE_BLINKALT 0x4UL + #define PORT_LED_QCFG_RESP_LED3_STATE_LAST PORT_LED_QCFG_RESP_LED3_STATE_BLINKALT + u8 led3_color; + #define PORT_LED_QCFG_RESP_LED3_COLOR_DEFAULT 0x0UL + #define PORT_LED_QCFG_RESP_LED3_COLOR_AMBER 0x1UL + #define PORT_LED_QCFG_RESP_LED3_COLOR_GREEN 0x2UL + #define PORT_LED_QCFG_RESP_LED3_COLOR_GREENAMBER 0x3UL + #define PORT_LED_QCFG_RESP_LED3_COLOR_LAST PORT_LED_QCFG_RESP_LED3_COLOR_GREENAMBER + u8 unused_3; + __le16 led3_blink_on; + __le16 led3_blink_off; + u8 led3_group_id; + u8 unused_4[6]; + u8 valid; +}; + +/* hwrm_port_led_qcaps_input (size:192b/24B) */ +struct hwrm_port_led_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_led_qcaps_output (size:384b/48B) */ +struct hwrm_port_led_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 num_leds; + u8 unused[3]; + u8 led0_id; + u8 led0_type; + #define PORT_LED_QCAPS_RESP_LED0_TYPE_SPEED 0x0UL + #define PORT_LED_QCAPS_RESP_LED0_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCAPS_RESP_LED0_TYPE_INVALID 0xffUL + #define PORT_LED_QCAPS_RESP_LED0_TYPE_LAST PORT_LED_QCAPS_RESP_LED0_TYPE_INVALID + u8 led0_group_id; + u8 unused_0; + __le16 led0_state_caps; + #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_ENABLED 0x1UL + #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_OFF_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_ON_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_SUPPORTED 0x8UL + #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL + __le16 led0_color_caps; + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + u8 led1_id; + u8 led1_type; + #define PORT_LED_QCAPS_RESP_LED1_TYPE_SPEED 0x0UL + #define PORT_LED_QCAPS_RESP_LED1_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCAPS_RESP_LED1_TYPE_INVALID 0xffUL + #define PORT_LED_QCAPS_RESP_LED1_TYPE_LAST PORT_LED_QCAPS_RESP_LED1_TYPE_INVALID + u8 led1_group_id; + u8 unused_1; + __le16 led1_state_caps; + #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_ENABLED 0x1UL + #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_OFF_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_ON_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_SUPPORTED 0x8UL + #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL + __le16 led1_color_caps; + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + u8 led2_id; + u8 led2_type; + #define PORT_LED_QCAPS_RESP_LED2_TYPE_SPEED 0x0UL + #define PORT_LED_QCAPS_RESP_LED2_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCAPS_RESP_LED2_TYPE_INVALID 0xffUL + #define PORT_LED_QCAPS_RESP_LED2_TYPE_LAST PORT_LED_QCAPS_RESP_LED2_TYPE_INVALID + u8 led2_group_id; + u8 unused_2; + __le16 led2_state_caps; + #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_ENABLED 0x1UL + #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_OFF_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_ON_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_SUPPORTED 0x8UL + #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL + __le16 led2_color_caps; + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + u8 led3_id; + u8 led3_type; + #define PORT_LED_QCAPS_RESP_LED3_TYPE_SPEED 0x0UL + #define PORT_LED_QCAPS_RESP_LED3_TYPE_ACTIVITY 0x1UL + #define PORT_LED_QCAPS_RESP_LED3_TYPE_INVALID 0xffUL + #define PORT_LED_QCAPS_RESP_LED3_TYPE_LAST PORT_LED_QCAPS_RESP_LED3_TYPE_INVALID + u8 led3_group_id; + u8 unused_3; + __le16 led3_state_caps; + #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_ENABLED 0x1UL + #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_OFF_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_ON_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_SUPPORTED 0x8UL + #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL + __le16 led3_color_caps; + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + u8 unused_4[3]; + u8 valid; +}; + +/* hwrm_port_mac_qcaps_input (size:192b/24B) */ +struct hwrm_port_mac_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_port_mac_qcaps_output (size:128b/16B) */ +struct hwrm_port_mac_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define PORT_MAC_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED 0x1UL + #define PORT_MAC_QCAPS_RESP_FLAGS_REMOTE_LPBK_SUPPORTED 0x2UL + u8 unused_0[6]; + u8 valid; +}; + +/* hwrm_queue_qportcfg_input (size:192b/24B) */ +struct hwrm_queue_qportcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX + __le16 port_id; + u8 drv_qmap_cap; + #define QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_DISABLED 0x0UL + #define QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_ENABLED 0x1UL + #define QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_LAST QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_ENABLED + u8 unused_0; +}; + +/* hwrm_queue_qportcfg_output (size:1344b/168B) */ +struct hwrm_queue_qportcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 max_configurable_queues; + u8 max_configurable_lossless_queues; + u8 queue_cfg_allowed; + u8 queue_cfg_info; + #define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_USE_PROFILE_TYPE 0x2UL + u8 queue_pfcenable_cfg_allowed; + u8 queue_pri2cos_cfg_allowed; + u8 queue_cos2bw_cfg_allowed; + u8 queue_id0; + u8 queue_id0_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN + u8 queue_id1; + u8 queue_id1_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN + u8 queue_id2; + u8 queue_id2_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN + u8 queue_id3; + u8 queue_id3_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN + u8 queue_id4; + u8 queue_id4_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN + u8 queue_id5; + u8 queue_id5_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN + u8 queue_id6; + u8 queue_id6_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN + u8 queue_id7; + u8 queue_id7_service_profile; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN + u8 queue_id0_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_CNP 0x4UL + char qid0_name[16]; + char qid1_name[16]; + char qid2_name[16]; + char qid3_name[16]; + char qid4_name[16]; + char qid5_name[16]; + char qid6_name[16]; + char qid7_name[16]; + u8 queue_id1_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id2_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id3_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id4_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id5_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id6_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id7_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 valid; +}; + +/* hwrm_queue_qcfg_input (size:192b/24B) */ +struct hwrm_queue_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define QUEUE_QCFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_QCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_QCFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_QCFG_REQ_FLAGS_PATH_LAST QUEUE_QCFG_REQ_FLAGS_PATH_RX + __le32 queue_id; +}; + +/* hwrm_queue_qcfg_output (size:128b/16B) */ +struct hwrm_queue_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 queue_len; + u8 service_profile; + #define QUEUE_QCFG_RESP_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QCFG_RESP_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QCFG_RESP_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_QCFG_RESP_SERVICE_PROFILE_LAST QUEUE_QCFG_RESP_SERVICE_PROFILE_UNKNOWN + u8 queue_cfg_info; + #define QUEUE_QCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL + u8 unused_0; + u8 valid; +}; + +/* hwrm_queue_cfg_input (size:320b/40B) */ +struct hwrm_queue_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define QUEUE_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_CFG_REQ_FLAGS_PATH_SFT 0 + #define QUEUE_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_CFG_REQ_FLAGS_PATH_BIDIR 0x2UL + #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_BIDIR + __le32 enables; + #define QUEUE_CFG_REQ_ENABLES_DFLT_LEN 0x1UL + #define QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE 0x2UL + __le32 queue_id; + __le32 dflt_len; + u8 service_profile; + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN 0xffUL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LAST QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN + u8 unused_0[7]; +}; + +/* hwrm_queue_cfg_output (size:128b/16B) */ +struct hwrm_queue_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_queue_pfcenable_qcfg_input (size:192b/24B) */ +struct hwrm_queue_pfcenable_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_queue_pfcenable_qcfg_output (size:128b/16B) */ +struct hwrm_queue_pfcenable_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI0_PFC_ENABLED 0x1UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI1_PFC_ENABLED 0x2UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI2_PFC_ENABLED 0x4UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI3_PFC_ENABLED 0x8UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI4_PFC_ENABLED 0x10UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI5_PFC_ENABLED 0x20UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI6_PFC_ENABLED 0x40UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI7_PFC_ENABLED 0x80UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI0_PFC_WATCHDOG_ENABLED 0x100UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI1_PFC_WATCHDOG_ENABLED 0x200UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI2_PFC_WATCHDOG_ENABLED 0x400UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI3_PFC_WATCHDOG_ENABLED 0x800UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI4_PFC_WATCHDOG_ENABLED 0x1000UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI5_PFC_WATCHDOG_ENABLED 0x2000UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI6_PFC_WATCHDOG_ENABLED 0x4000UL + #define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI7_PFC_WATCHDOG_ENABLED 0x8000UL + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_queue_pfcenable_cfg_input (size:192b/24B) */ +struct hwrm_queue_pfcenable_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI0_PFC_ENABLED 0x1UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI1_PFC_ENABLED 0x2UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI2_PFC_ENABLED 0x4UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI3_PFC_ENABLED 0x8UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI4_PFC_ENABLED 0x10UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI5_PFC_ENABLED 0x20UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI6_PFC_ENABLED 0x40UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI7_PFC_ENABLED 0x80UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI0_PFC_WATCHDOG_ENABLED 0x100UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI1_PFC_WATCHDOG_ENABLED 0x200UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI2_PFC_WATCHDOG_ENABLED 0x400UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI3_PFC_WATCHDOG_ENABLED 0x800UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI4_PFC_WATCHDOG_ENABLED 0x1000UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI5_PFC_WATCHDOG_ENABLED 0x2000UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI6_PFC_WATCHDOG_ENABLED 0x4000UL + #define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI7_PFC_WATCHDOG_ENABLED 0x8000UL + __le16 port_id; + u8 unused_0[2]; +}; + +/* hwrm_queue_pfcenable_cfg_output (size:128b/16B) */ +struct hwrm_queue_pfcenable_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_queue_pri2cos_qcfg_input (size:192b/24B) */ +struct hwrm_queue_pri2cos_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_RX + #define QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN 0x2UL + u8 port_id; + u8 unused_0[3]; +}; + +/* hwrm_queue_pri2cos_qcfg_output (size:192b/24B) */ +struct hwrm_queue_pri2cos_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 pri0_cos_queue_id; + u8 pri1_cos_queue_id; + u8 pri2_cos_queue_id; + u8 pri3_cos_queue_id; + u8 pri4_cos_queue_id; + u8 pri5_cos_queue_id; + u8 pri6_cos_queue_id; + u8 pri7_cos_queue_id; + u8 queue_cfg_info; + #define QUEUE_PRI2COS_QCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL + u8 unused_0[6]; + u8 valid; +}; + +/* hwrm_queue_pri2cos_cfg_input (size:320b/40B) */ +struct hwrm_queue_pri2cos_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_SFT 0 + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x4UL + __le32 enables; + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI1_COS_QUEUE_ID 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI2_COS_QUEUE_ID 0x4UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI3_COS_QUEUE_ID 0x8UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI4_COS_QUEUE_ID 0x10UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI5_COS_QUEUE_ID 0x20UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI6_COS_QUEUE_ID 0x40UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI7_COS_QUEUE_ID 0x80UL + u8 port_id; + u8 pri0_cos_queue_id; + u8 pri1_cos_queue_id; + u8 pri2_cos_queue_id; + u8 pri3_cos_queue_id; + u8 pri4_cos_queue_id; + u8 pri5_cos_queue_id; + u8 pri6_cos_queue_id; + u8 pri7_cos_queue_id; + u8 unused_0[7]; +}; + +/* hwrm_queue_pri2cos_cfg_output (size:128b/16B) */ +struct hwrm_queue_pri2cos_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_queue_cos2bw_qcfg_input (size:192b/24B) */ +struct hwrm_queue_cos2bw_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; +}; + +/* hwrm_queue_cos2bw_qcfg_output (size:896b/112B) */ +struct hwrm_queue_cos2bw_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 queue_id0; + u8 unused_0; + __le16 unused_1; + __le32 queue_id0_min_bw; + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_SCALE_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_SCALE_BYTES + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID + __le32 queue_id0_max_bw; + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_SCALE_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_SCALE_BYTES + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID + u8 queue_id0_tsa_assign; + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL + u8 queue_id0_pri_lvl; + u8 queue_id0_bw_weight; + struct { + u8 queue_id; + __le32 queue_id_min_bw; + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_SCALE_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_SCALE_BYTES + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_INVALID + __le32 queue_id_max_bw; + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_SCALE_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_SCALE_BYTES + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_INVALID + u8 queue_id_tsa_assign; + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID_TSA_ASSIGN_RESERVED_LAST 0xffUL + u8 queue_id_pri_lvl; + u8 queue_id_bw_weight; + } __packed cfg[7]; + u8 unused_2[4]; + u8 valid; +}; + +/* hwrm_queue_cos2bw_cfg_input (size:1024b/128B) */ +struct hwrm_queue_cos2bw_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + __le32 enables; + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID 0x1UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID1_VALID 0x2UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID2_VALID 0x4UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID3_VALID 0x8UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID4_VALID 0x10UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID5_VALID 0x20UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID6_VALID 0x40UL + #define QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID7_VALID 0x80UL + __le16 port_id; + u8 queue_id0; + u8 unused_0; + __le32 queue_id0_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_SCALE_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_SCALE_BYTES + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID + __le32 queue_id0_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_SCALE_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_SCALE_BYTES + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID + u8 queue_id0_tsa_assign; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL + u8 queue_id0_pri_lvl; + u8 queue_id0_bw_weight; + struct { + u8 queue_id; + __le32 queue_id_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_SCALE_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_SCALE_BYTES + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MIN_BW_BW_VALUE_UNIT_INVALID + __le32 queue_id_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_SCALE 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_SCALE_BITS (0x0UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_SCALE_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_SCALE_BYTES + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID_MAX_BW_BW_VALUE_UNIT_INVALID + u8 queue_id_tsa_assign; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID_TSA_ASSIGN_RESERVED_LAST 0xffUL + u8 queue_id_pri_lvl; + u8 queue_id_bw_weight; + } __packed cfg[7]; + u8 unused_1[5]; +}; + +/* hwrm_queue_cos2bw_cfg_output (size:128b/16B) */ +struct hwrm_queue_cos2bw_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_queue_dscp_qcaps_input (size:192b/24B) */ +struct hwrm_queue_dscp_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 port_id; + u8 unused_0[7]; +}; + +/* hwrm_queue_dscp_qcaps_output (size:128b/16B) */ +struct hwrm_queue_dscp_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 num_dscp_bits; + u8 unused_0; + __le16 max_entries; + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_queue_dscp2pri_qcfg_input (size:256b/32B) */ +struct hwrm_queue_dscp2pri_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 dest_data_addr; + u8 port_id; + u8 unused_0; + __le16 dest_data_buffer_size; + u8 unused_1[4]; +}; + +/* hwrm_queue_dscp2pri_qcfg_output (size:128b/16B) */ +struct hwrm_queue_dscp2pri_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 entry_cnt; + u8 default_pri; + u8 unused_0[4]; + u8 valid; +}; + +/* hwrm_queue_dscp2pri_cfg_input (size:320b/40B) */ +struct hwrm_queue_dscp2pri_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 src_data_addr; + __le32 flags; + #define QUEUE_DSCP2PRI_CFG_REQ_FLAGS_USE_HW_DEFAULT_PRI 0x1UL + __le32 enables; + #define QUEUE_DSCP2PRI_CFG_REQ_ENABLES_DEFAULT_PRI 0x1UL + u8 port_id; + u8 default_pri; + __le16 entry_cnt; + u8 unused_0[4]; +}; + +/* hwrm_queue_dscp2pri_cfg_output (size:128b/16B) */ +struct hwrm_queue_dscp2pri_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_alloc_input (size:192b/24B) */ +struct hwrm_vnic_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define VNIC_ALLOC_REQ_FLAGS_DEFAULT 0x1UL + #define VNIC_ALLOC_REQ_FLAGS_VIRTIO_NET_FID_VALID 0x2UL + #define VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID 0x4UL + __le16 virtio_net_fid; + __le16 vnic_id; +}; + +/* hwrm_vnic_alloc_output (size:128b/16B) */ +struct hwrm_vnic_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 vnic_id; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_vnic_update_input (size:256b/32B) */ +struct hwrm_vnic_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 vnic_id; + __le32 enables; + #define VNIC_UPDATE_REQ_ENABLES_VNIC_STATE_VALID 0x1UL + #define VNIC_UPDATE_REQ_ENABLES_MRU_VALID 0x2UL + #define VNIC_UPDATE_REQ_ENABLES_METADATA_FORMAT_TYPE_VALID 0x4UL + u8 vnic_state; + #define VNIC_UPDATE_REQ_VNIC_STATE_NORMAL 0x0UL + #define VNIC_UPDATE_REQ_VNIC_STATE_DROP 0x1UL + #define VNIC_UPDATE_REQ_VNIC_STATE_LAST VNIC_UPDATE_REQ_VNIC_STATE_DROP + u8 metadata_format_type; + #define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_0 0x0UL + #define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_1 0x1UL + #define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_2 0x2UL + #define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_3 0x3UL + #define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_4 0x4UL + #define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_LAST VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_4 + __le16 mru; + u8 unused_1[4]; +}; + +/* hwrm_vnic_update_output (size:128b/16B) */ +struct hwrm_vnic_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_free_input (size:192b/24B) */ +struct hwrm_vnic_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 vnic_id; + u8 unused_0[4]; +}; + +/* hwrm_vnic_free_output (size:128b/16B) */ +struct hwrm_vnic_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_cfg_input (size:384b/48B) */ +struct hwrm_vnic_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define VNIC_CFG_REQ_FLAGS_DEFAULT 0x1UL + #define VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE 0x2UL + #define VNIC_CFG_REQ_FLAGS_BD_STALL_MODE 0x4UL + #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL + #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL + #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL + #define VNIC_CFG_REQ_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE 0x40UL + #define VNIC_CFG_REQ_FLAGS_PORTCOS_MAPPING_MODE 0x80UL + __le32 enables; + #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL + #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL + #define VNIC_CFG_REQ_ENABLES_COS_RULE 0x4UL + #define VNIC_CFG_REQ_ENABLES_LB_RULE 0x8UL + #define VNIC_CFG_REQ_ENABLES_MRU 0x10UL + #define VNIC_CFG_REQ_ENABLES_DEFAULT_RX_RING_ID 0x20UL + #define VNIC_CFG_REQ_ENABLES_DEFAULT_CMPL_RING_ID 0x40UL + #define VNIC_CFG_REQ_ENABLES_QUEUE_ID 0x80UL + #define VNIC_CFG_REQ_ENABLES_RX_CSUM_V2_MODE 0x100UL + #define VNIC_CFG_REQ_ENABLES_L2_CQE_MODE 0x200UL + #define VNIC_CFG_REQ_ENABLES_RAW_QP_ID 0x400UL + __le16 vnic_id; + __le16 dflt_ring_grp; + __le16 rss_rule; + __le16 cos_rule; + __le16 lb_rule; + __le16 mru; + __le16 default_rx_ring_id; + __le16 default_cmpl_ring_id; + __le16 queue_id; + u8 rx_csum_v2_mode; + #define VNIC_CFG_REQ_RX_CSUM_V2_MODE_DEFAULT 0x0UL + #define VNIC_CFG_REQ_RX_CSUM_V2_MODE_ALL_OK 0x1UL + #define VNIC_CFG_REQ_RX_CSUM_V2_MODE_MAX 0x2UL + #define VNIC_CFG_REQ_RX_CSUM_V2_MODE_LAST VNIC_CFG_REQ_RX_CSUM_V2_MODE_MAX + u8 l2_cqe_mode; + #define VNIC_CFG_REQ_L2_CQE_MODE_DEFAULT 0x0UL + #define VNIC_CFG_REQ_L2_CQE_MODE_COMPRESSED 0x1UL + #define VNIC_CFG_REQ_L2_CQE_MODE_MIXED 0x2UL + #define VNIC_CFG_REQ_L2_CQE_MODE_LAST VNIC_CFG_REQ_L2_CQE_MODE_MIXED + __le32 raw_qp_id; +}; + +/* hwrm_vnic_cfg_output (size:128b/16B) */ +struct hwrm_vnic_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_qcaps_input (size:192b/24B) */ +struct hwrm_vnic_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + u8 unused_0[4]; +}; + +/* hwrm_vnic_qcaps_output (size:192b/24B) */ +struct hwrm_vnic_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 mru; + u8 unused_0[2]; + __le32 flags; + #define VNIC_QCAPS_RESP_FLAGS_UNUSED 0x1UL + #define VNIC_QCAPS_RESP_FLAGS_VLAN_STRIP_CAP 0x2UL + #define VNIC_QCAPS_RESP_FLAGS_BD_STALL_CAP 0x4UL + #define VNIC_QCAPS_RESP_FLAGS_ROCE_DUAL_VNIC_CAP 0x8UL + #define VNIC_QCAPS_RESP_FLAGS_ROCE_ONLY_VNIC_CAP 0x10UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP 0x20UL + #define VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP 0x40UL + #define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_CAP 0x80UL + #define VNIC_QCAPS_RESP_FLAGS_COS_ASSIGNMENT_CAP 0x100UL + #define VNIC_QCAPS_RESP_FLAGS_RX_CMPL_V2_CAP 0x200UL + #define VNIC_QCAPS_RESP_FLAGS_VNIC_STATE_CAP 0x400UL + #define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP 0x800UL + #define VNIC_QCAPS_RESP_FLAGS_METADATA_FORMAT_CAP 0x1000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_STRICT_HASH_TYPE_CAP 0x2000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP 0x4000UL + #define VNIC_QCAPS_RESP_FLAGS_RING_SELECT_MODE_TOEPLITZ_CAP 0x8000UL + #define VNIC_QCAPS_RESP_FLAGS_RING_SELECT_MODE_XOR_CAP 0x10000UL + #define VNIC_QCAPS_RESP_FLAGS_RING_SELECT_MODE_TOEPLITZ_CHKSM_CAP 0x20000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP 0x40000UL + #define VNIC_QCAPS_RESP_FLAGS_RX_CMPL_V3_CAP 0x80000UL + #define VNIC_QCAPS_RESP_FLAGS_L2_CQE_MODE_CAP 0x100000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_AH_SPI_IPV4_CAP 0x200000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV4_CAP 0x400000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_AH_SPI_IPV6_CAP 0x800000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP 0x1000000UL + #define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_TRUSTED_VF_CAP 0x2000000UL + #define VNIC_QCAPS_RESP_FLAGS_PORTCOS_MAPPING_MODE 0x4000000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_PROF_TCAM_MODE_ENABLED 0x8000000UL + #define VNIC_QCAPS_RESP_FLAGS_VNIC_RSS_HASH_MODE_CAP 0x10000000UL + #define VNIC_QCAPS_RESP_FLAGS_HW_TUNNEL_TPA_CAP 0x20000000UL + #define VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP 0x40000000UL + __le16 max_aggs_supported; + u8 unused_1[5]; + u8 valid; +}; + +/* hwrm_vnic_tpa_cfg_input (size:384b/48B) */ +struct hwrm_vnic_tpa_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define VNIC_TPA_CFG_REQ_FLAGS_TPA 0x1UL + #define VNIC_TPA_CFG_REQ_FLAGS_ENCAP_TPA 0x2UL + #define VNIC_TPA_CFG_REQ_FLAGS_RSC_WND_UPDATE 0x4UL + #define VNIC_TPA_CFG_REQ_FLAGS_GRO 0x8UL + #define VNIC_TPA_CFG_REQ_FLAGS_AGG_WITH_ECN 0x10UL + #define VNIC_TPA_CFG_REQ_FLAGS_AGG_WITH_SAME_GRE_SEQ 0x20UL + #define VNIC_TPA_CFG_REQ_FLAGS_GRO_IPID_CHECK 0x40UL + #define VNIC_TPA_CFG_REQ_FLAGS_GRO_TTL_CHECK 0x80UL + #define VNIC_TPA_CFG_REQ_FLAGS_AGG_PACK_AS_GRO 0x100UL + __le32 enables; + #define VNIC_TPA_CFG_REQ_ENABLES_MAX_AGG_SEGS 0x1UL + #define VNIC_TPA_CFG_REQ_ENABLES_MAX_AGGS 0x2UL + #define VNIC_TPA_CFG_REQ_ENABLES_MAX_AGG_TIMER 0x4UL + #define VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN 0x8UL + #define VNIC_TPA_CFG_REQ_ENABLES_TNL_TPA_EN 0x10UL + __le16 vnic_id; + __le16 max_agg_segs; + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX 0x1fUL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_LAST VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX + __le16 max_aggs; + #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 0x4UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX 0x7UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_LAST VNIC_TPA_CFG_REQ_MAX_AGGS_MAX + u8 unused_0[2]; + __le32 max_agg_timer; + __le32 min_agg_len; + __le32 tnl_tpa_en_bitmap; + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_VXLAN 0x1UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_GENEVE 0x2UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_NVGRE 0x4UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_GRE 0x8UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_IPV4 0x10UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_IPV6 0x20UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_VXLAN_GPE 0x40UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_VXLAN_CUST1 0x80UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_GRE_CUST1 0x100UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR1 0x200UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR2 0x400UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR3 0x800UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR4 0x1000UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR5 0x2000UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR6 0x4000UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR7 0x8000UL + #define VNIC_TPA_CFG_REQ_TNL_TPA_EN_BITMAP_UPAR8 0x10000UL + u8 unused_1[4]; +}; + +/* hwrm_vnic_tpa_cfg_output (size:128b/16B) */ +struct hwrm_vnic_tpa_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_tpa_qcfg_input (size:192b/24B) */ +struct hwrm_vnic_tpa_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 vnic_id; + u8 unused_0[6]; +}; + +/* hwrm_vnic_tpa_qcfg_output (size:256b/32B) */ +struct hwrm_vnic_tpa_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define VNIC_TPA_QCFG_RESP_FLAGS_TPA 0x1UL + #define VNIC_TPA_QCFG_RESP_FLAGS_ENCAP_TPA 0x2UL + #define VNIC_TPA_QCFG_RESP_FLAGS_RSC_WND_UPDATE 0x4UL + #define VNIC_TPA_QCFG_RESP_FLAGS_GRO 0x8UL + #define VNIC_TPA_QCFG_RESP_FLAGS_AGG_WITH_ECN 0x10UL + #define VNIC_TPA_QCFG_RESP_FLAGS_AGG_WITH_SAME_GRE_SEQ 0x20UL + #define VNIC_TPA_QCFG_RESP_FLAGS_GRO_IPID_CHECK 0x40UL + #define VNIC_TPA_QCFG_RESP_FLAGS_GRO_TTL_CHECK 0x80UL + __le16 max_agg_segs; + #define VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_1 0x0UL + #define VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_2 0x1UL + #define VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_4 0x2UL + #define VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_8 0x3UL + #define VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_MAX 0x1fUL + #define VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_LAST VNIC_TPA_QCFG_RESP_MAX_AGG_SEGS_MAX + __le16 max_aggs; + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_1 0x0UL + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_2 0x1UL + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_4 0x2UL + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_8 0x3UL + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_16 0x4UL + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_MAX 0x7UL + #define VNIC_TPA_QCFG_RESP_MAX_AGGS_LAST VNIC_TPA_QCFG_RESP_MAX_AGGS_MAX + __le32 max_agg_timer; + __le32 min_agg_len; + __le32 tnl_tpa_en_bitmap; + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_VXLAN 0x1UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_GENEVE 0x2UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_NVGRE 0x4UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_GRE 0x8UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_IPV4 0x10UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_IPV6 0x20UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_VXLAN_GPE 0x40UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_VXLAN_CUST1 0x80UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_GRE_CUST1 0x100UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR1 0x200UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR2 0x400UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR3 0x800UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR4 0x1000UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR5 0x2000UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR6 0x4000UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR7 0x8000UL + #define VNIC_TPA_QCFG_RESP_TNL_TPA_EN_BITMAP_UPAR8 0x10000UL + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_vnic_rss_cfg_input (size:384b/48B) */ +struct hwrm_vnic_rss_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 hash_type; + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL 0x40UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV4 0x80UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV4 0x100UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV6 0x200UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV6 0x400UL + __le16 vnic_id; + u8 ring_table_pair_index; + u8 hash_mode_flags; + #define VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT 0x1UL + #define VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_INNERMOST_4 0x2UL + #define VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_INNERMOST_2 0x4UL + #define VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_OUTERMOST_4 0x8UL + #define VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_OUTERMOST_2 0x10UL + __le64 ring_grp_tbl_addr; + __le64 hash_key_tbl_addr; + __le16 rss_ctx_idx; + u8 flags; + #define VNIC_RSS_CFG_REQ_FLAGS_HASH_TYPE_INCLUDE 0x1UL + #define VNIC_RSS_CFG_REQ_FLAGS_HASH_TYPE_EXCLUDE 0x2UL + #define VNIC_RSS_CFG_REQ_FLAGS_IPSEC_HASH_TYPE_CFG_SUPPORT 0x4UL + u8 ring_select_mode; + #define VNIC_RSS_CFG_REQ_RING_SELECT_MODE_TOEPLITZ 0x0UL + #define VNIC_RSS_CFG_REQ_RING_SELECT_MODE_XOR 0x1UL + #define VNIC_RSS_CFG_REQ_RING_SELECT_MODE_TOEPLITZ_CHECKSUM 0x2UL + #define VNIC_RSS_CFG_REQ_RING_SELECT_MODE_LAST VNIC_RSS_CFG_REQ_RING_SELECT_MODE_TOEPLITZ_CHECKSUM + u8 unused_1[4]; +}; + +/* hwrm_vnic_rss_cfg_output (size:128b/16B) */ +struct hwrm_vnic_rss_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_rss_cfg_cmd_err (size:64b/8B) */ +struct hwrm_vnic_rss_cfg_cmd_err { + u8 code; + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY + u8 unused_0[7]; +}; + +/* hwrm_vnic_rss_qcfg_input (size:192b/24B) */ +struct hwrm_vnic_rss_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 rss_ctx_idx; + __le16 vnic_id; + u8 unused_0[4]; +}; + +/* hwrm_vnic_rss_qcfg_output (size:512b/64B) */ +struct hwrm_vnic_rss_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 hash_type; + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_IPV4 0x1UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_TCP_IPV4 0x2UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_UDP_IPV4 0x4UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_IPV6 0x8UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_TCP_IPV6 0x10UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_UDP_IPV6 0x20UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_IPV6_FLOW_LABEL 0x40UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_AH_SPI_IPV4 0x80UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_ESP_SPI_IPV4 0x100UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_AH_SPI_IPV6 0x200UL + #define VNIC_RSS_QCFG_RESP_HASH_TYPE_ESP_SPI_IPV6 0x400UL + u8 unused_0[4]; + __le32 hash_key[10]; + u8 hash_mode_flags; + #define VNIC_RSS_QCFG_RESP_HASH_MODE_FLAGS_DEFAULT 0x1UL + #define VNIC_RSS_QCFG_RESP_HASH_MODE_FLAGS_INNERMOST_4 0x2UL + #define VNIC_RSS_QCFG_RESP_HASH_MODE_FLAGS_INNERMOST_2 0x4UL + #define VNIC_RSS_QCFG_RESP_HASH_MODE_FLAGS_OUTERMOST_4 0x8UL + #define VNIC_RSS_QCFG_RESP_HASH_MODE_FLAGS_OUTERMOST_2 0x10UL + u8 ring_select_mode; + #define VNIC_RSS_QCFG_RESP_RING_SELECT_MODE_TOEPLITZ 0x0UL + #define VNIC_RSS_QCFG_RESP_RING_SELECT_MODE_XOR 0x1UL + #define VNIC_RSS_QCFG_RESP_RING_SELECT_MODE_TOEPLITZ_CHECKSUM 0x2UL + #define VNIC_RSS_QCFG_RESP_RING_SELECT_MODE_LAST VNIC_RSS_QCFG_RESP_RING_SELECT_MODE_TOEPLITZ_CHECKSUM + u8 unused_1[5]; + u8 valid; +}; + +/* hwrm_vnic_plcmodes_cfg_input (size:320b/40B) */ +struct hwrm_vnic_plcmodes_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define VNIC_PLCMODES_CFG_REQ_FLAGS_REGULAR_PLACEMENT 0x1UL + #define VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT 0x2UL + #define VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 0x4UL + #define VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6 0x8UL + #define VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_FCOE 0x10UL + #define VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_ROCE 0x20UL + #define VNIC_PLCMODES_CFG_REQ_FLAGS_VIRTIO_PLACEMENT 0x40UL + __le32 enables; + #define VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID 0x1UL + #define VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_OFFSET_VALID 0x2UL + #define VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID 0x4UL + #define VNIC_PLCMODES_CFG_REQ_ENABLES_MAX_BDS_VALID 0x8UL + __le32 vnic_id; + __le16 jumbo_thresh; + __le16 hds_offset; + __le16 hds_threshold; + __le16 max_bds; + u8 unused_0[4]; +}; + +/* hwrm_vnic_plcmodes_cfg_output (size:128b/16B) */ +struct hwrm_vnic_plcmodes_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vnic_plcmodes_cfg_cmd_err (size:64b/8B) */ +struct hwrm_vnic_plcmodes_cfg_cmd_err { + u8 code; + #define VNIC_PLCMODES_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define VNIC_PLCMODES_CFG_CMD_ERR_CODE_INVALID_HDS_THRESHOLD 0x1UL + #define VNIC_PLCMODES_CFG_CMD_ERR_CODE_LAST VNIC_PLCMODES_CFG_CMD_ERR_CODE_INVALID_HDS_THRESHOLD + u8 unused_0[7]; +}; + +/* hwrm_vnic_rss_cos_lb_ctx_alloc_input (size:128b/16B) */ +struct hwrm_vnic_rss_cos_lb_ctx_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_vnic_rss_cos_lb_ctx_alloc_output (size:128b/16B) */ +struct hwrm_vnic_rss_cos_lb_ctx_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 rss_cos_lb_ctx_id; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_vnic_rss_cos_lb_ctx_free_input (size:192b/24B) */ +struct hwrm_vnic_rss_cos_lb_ctx_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 rss_cos_lb_ctx_id; + u8 unused_0[6]; +}; + +/* hwrm_vnic_rss_cos_lb_ctx_free_output (size:128b/16B) */ +struct hwrm_vnic_rss_cos_lb_ctx_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_ring_alloc_input (size:704b/88B) */ +struct hwrm_ring_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL + #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL + #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL + #define RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID 0x40UL + #define RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID 0x80UL + #define RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID 0x100UL + #define RING_ALLOC_REQ_ENABLES_SCHQ_ID 0x200UL + #define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL + #define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL + #define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID 0x1000UL + u8 ring_type; + #define RING_ALLOC_REQ_RING_TYPE_L2_CMPL 0x0UL + #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL + #define RING_ALLOC_REQ_RING_TYPE_RX 0x2UL + #define RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL 0x3UL + #define RING_ALLOC_REQ_RING_TYPE_RX_AGG 0x4UL + #define RING_ALLOC_REQ_RING_TYPE_NQ 0x5UL + #define RING_ALLOC_REQ_RING_TYPE_LAST RING_ALLOC_REQ_RING_TYPE_NQ + u8 cmpl_coal_cnt; + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_OFF 0x0UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_4 0x1UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_8 0x2UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_12 0x3UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_16 0x4UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_24 0x5UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_32 0x6UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_48 0x7UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_64 0x8UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_96 0x9UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_128 0xaUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_192 0xbUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_256 0xcUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_320 0xdUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_384 0xeUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_MAX 0xfUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_LAST RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_MAX + __le16 flags; + #define RING_ALLOC_REQ_FLAGS_RX_SOP_PAD 0x1UL + #define RING_ALLOC_REQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x2UL + #define RING_ALLOC_REQ_FLAGS_NQ_DBR_PACING 0x4UL + #define RING_ALLOC_REQ_FLAGS_TX_PKT_TS_CMPL_ENABLE 0x8UL + __le64 page_tbl_addr; + __le32 fbo; + u8 page_size; + u8 page_tbl_depth; + __le16 schq_id; + __le32 length; + __le16 logical_id; + __le16 cmpl_ring_id; + __le16 queue_id; + __le16 rx_buf_size; + __le16 rx_ring_id; + __le16 nq_ring_id; + __le16 ring_arb_cfg; + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_MASK 0xfUL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SFT 0 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SP 0x1UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ 0x2UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_LAST RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_MASK 0xf0UL + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_SFT 4 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_MASK 0xff00UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8 + __le16 steering_tag; + __le32 reserved3; + __le32 stat_ctx_id; + __le32 reserved4; + __le32 max_bw; + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_SFT 0 + #define RING_ALLOC_REQ_MAX_BW_SCALE 0x10000000UL + #define RING_ALLOC_REQ_MAX_BW_SCALE_BITS (0x0UL << 28) + #define RING_ALLOC_REQ_MAX_BW_SCALE_BYTES (0x1UL << 28) + #define RING_ALLOC_REQ_MAX_BW_SCALE_LAST RING_ALLOC_REQ_MAX_BW_SCALE_BYTES + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MEGA (0x0UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_KILO (0x2UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_BASE (0x4UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_GIGA (0x6UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_LAST RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID + u8 int_mode; + #define RING_ALLOC_REQ_INT_MODE_LEGACY 0x0UL + #define RING_ALLOC_REQ_INT_MODE_RSVD 0x1UL + #define RING_ALLOC_REQ_INT_MODE_MSIX 0x2UL + #define RING_ALLOC_REQ_INT_MODE_POLL 0x3UL + #define RING_ALLOC_REQ_INT_MODE_LAST RING_ALLOC_REQ_INT_MODE_POLL + u8 mpc_chnls_type; + #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_TCE 0x0UL + #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_RCE 0x1UL + #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_TE_CFA 0x2UL + #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_RE_CFA 0x3UL + #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_PRIMATE 0x4UL + #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_LAST RING_ALLOC_REQ_MPC_CHNLS_TYPE_PRIMATE + u8 rx_rate_profile_sel; + #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_DEFAULT 0x0UL + #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE 0x1UL + #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE + u8 unused_4; + __le64 cq_handle; +}; + +/* hwrm_ring_alloc_output (size:128b/16B) */ +struct hwrm_ring_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 ring_id; + __le16 logical_ring_id; + u8 push_buffer_index; + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_LAST RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER + u8 unused_0[2]; + u8 valid; +}; + +/* hwrm_ring_free_input (size:256b/32B) */ +struct hwrm_ring_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 ring_type; + #define RING_FREE_REQ_RING_TYPE_L2_CMPL 0x0UL + #define RING_FREE_REQ_RING_TYPE_TX 0x1UL + #define RING_FREE_REQ_RING_TYPE_RX 0x2UL + #define RING_FREE_REQ_RING_TYPE_ROCE_CMPL 0x3UL + #define RING_FREE_REQ_RING_TYPE_RX_AGG 0x4UL + #define RING_FREE_REQ_RING_TYPE_NQ 0x5UL + #define RING_FREE_REQ_RING_TYPE_LAST RING_FREE_REQ_RING_TYPE_NQ + u8 flags; + #define RING_FREE_REQ_FLAGS_VIRTIO_RING_VALID 0x1UL + #define RING_FREE_REQ_FLAGS_LAST RING_FREE_REQ_FLAGS_VIRTIO_RING_VALID + __le16 ring_id; + __le32 prod_idx; + __le32 opaque; + __le32 unused_1; +}; + +/* hwrm_ring_free_output (size:128b/16B) */ +struct hwrm_ring_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_ring_reset_input (size:192b/24B) */ +struct hwrm_ring_reset_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 ring_type; + #define RING_RESET_REQ_RING_TYPE_L2_CMPL 0x0UL + #define RING_RESET_REQ_RING_TYPE_TX 0x1UL + #define RING_RESET_REQ_RING_TYPE_RX 0x2UL + #define RING_RESET_REQ_RING_TYPE_ROCE_CMPL 0x3UL + #define RING_RESET_REQ_RING_TYPE_RX_RING_GRP 0x6UL + #define RING_RESET_REQ_RING_TYPE_LAST RING_RESET_REQ_RING_TYPE_RX_RING_GRP + u8 unused_0; + __le16 ring_id; + u8 unused_1[4]; +}; + +/* hwrm_ring_reset_output (size:128b/16B) */ +struct hwrm_ring_reset_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 push_buffer_index; + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_LAST RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER + u8 unused_0[3]; + u8 consumer_idx[3]; + u8 valid; +}; + +/* hwrm_ring_aggint_qcaps_input (size:128b/16B) */ +struct hwrm_ring_aggint_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_ring_aggint_qcaps_output (size:384b/48B) */ +struct hwrm_ring_aggint_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 cmpl_params; + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_INT_LAT_TMR_MIN 0x1UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_INT_LAT_TMR_MAX 0x2UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_TIMER_RESET 0x4UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_RING_IDLE 0x8UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_DMA_AGGR 0x10UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_DMA_AGGR_DURING_INT 0x20UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_CMPL_AGGR_DMA_TMR 0x40UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_CMPL_AGGR_DMA_TMR_DURING_INT 0x80UL + #define RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_AGGR_INT 0x100UL + __le32 nq_params; + #define RING_AGGINT_QCAPS_RESP_NQ_PARAMS_INT_LAT_TMR_MIN 0x1UL + __le16 num_cmpl_dma_aggr_min; + __le16 num_cmpl_dma_aggr_max; + __le16 num_cmpl_dma_aggr_during_int_min; + __le16 num_cmpl_dma_aggr_during_int_max; + __le16 cmpl_aggr_dma_tmr_min; + __le16 cmpl_aggr_dma_tmr_max; + __le16 cmpl_aggr_dma_tmr_during_int_min; + __le16 cmpl_aggr_dma_tmr_during_int_max; + __le16 int_lat_tmr_min_min; + __le16 int_lat_tmr_min_max; + __le16 int_lat_tmr_max_min; + __le16 int_lat_tmr_max_max; + __le16 num_cmpl_aggr_int_min; + __le16 num_cmpl_aggr_int_max; + __le16 timer_units; + u8 unused_0[1]; + u8 valid; +}; + +/* hwrm_ring_cmpl_ring_qaggint_params_input (size:192b/24B) */ +struct hwrm_ring_cmpl_ring_qaggint_params_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 ring_id; + __le16 flags; + #define RING_CMPL_RING_QAGGINT_PARAMS_REQ_FLAGS_UNUSED_0_MASK 0x3UL + #define RING_CMPL_RING_QAGGINT_PARAMS_REQ_FLAGS_UNUSED_0_SFT 0 + #define RING_CMPL_RING_QAGGINT_PARAMS_REQ_FLAGS_IS_NQ 0x4UL + u8 unused_0[4]; +}; + +/* hwrm_ring_cmpl_ring_qaggint_params_output (size:256b/32B) */ +struct hwrm_ring_cmpl_ring_qaggint_params_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 flags; + #define RING_CMPL_RING_QAGGINT_PARAMS_RESP_FLAGS_TIMER_RESET 0x1UL + #define RING_CMPL_RING_QAGGINT_PARAMS_RESP_FLAGS_RING_IDLE 0x2UL + __le16 num_cmpl_dma_aggr; + __le16 num_cmpl_dma_aggr_during_int; + __le16 cmpl_aggr_dma_tmr; + __le16 cmpl_aggr_dma_tmr_during_int; + __le16 int_lat_tmr_min; + __le16 int_lat_tmr_max; + __le16 num_cmpl_aggr_int; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_ring_cmpl_ring_cfg_aggint_params_input (size:320b/40B) */ +struct hwrm_ring_cmpl_ring_cfg_aggint_params_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 ring_id; + __le16 flags; + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET 0x1UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE 0x2UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_IS_NQ 0x4UL + __le16 num_cmpl_dma_aggr; + __le16 num_cmpl_dma_aggr_during_int; + __le16 cmpl_aggr_dma_tmr; + __le16 cmpl_aggr_dma_tmr_during_int; + __le16 int_lat_tmr_min; + __le16 int_lat_tmr_max; + __le16 num_cmpl_aggr_int; + __le16 enables; + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_NUM_CMPL_DMA_AGGR 0x1UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_NUM_CMPL_DMA_AGGR_DURING_INT 0x2UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_CMPL_AGGR_DMA_TMR 0x4UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_INT_LAT_TMR_MIN 0x8UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_INT_LAT_TMR_MAX 0x10UL + #define RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_NUM_CMPL_AGGR_INT 0x20UL + u8 unused_0[4]; +}; + +/* hwrm_ring_cmpl_ring_cfg_aggint_params_output (size:128b/16B) */ +struct hwrm_ring_cmpl_ring_cfg_aggint_params_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_ring_grp_alloc_input (size:192b/24B) */ +struct hwrm_ring_grp_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 cr; + __le16 rr; + __le16 ar; + __le16 sc; +}; + +/* hwrm_ring_grp_alloc_output (size:128b/16B) */ +struct hwrm_ring_grp_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 ring_group_id; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_ring_grp_free_input (size:192b/24B) */ +struct hwrm_ring_grp_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 ring_group_id; + u8 unused_0[4]; +}; + +/* hwrm_ring_grp_free_output (size:128b/16B) */ +struct hwrm_ring_grp_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +#define DEFAULT_FLOW_ID 0xFFFFFFFFUL +#define ROCEV1_FLOW_ID 0xFFFFFFFEUL +#define ROCEV2_FLOW_ID 0xFFFFFFFDUL +#define ROCEV2_CNP_FLOW_ID 0xFFFFFFFCUL + +/* hwrm_cfa_l2_filter_alloc_input (size:768b/96B) */ +struct hwrm_cfa_l2_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_TX 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_LOOPBACK 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_DROP 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_MASK 0x30UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_SFT 4 + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 4) + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 4) + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 4) + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_ROCE + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_XDP_DISABLE 0x40UL + #define CFA_L2_FILTER_ALLOC_REQ_FLAGS_SOURCE_VALID 0x80UL + __le32 enables; + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_OVLAN 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_OVLAN_MASK 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN 0x10UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN_MASK 0x20UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_L2_ADDR 0x40UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_L2_ADDR_MASK 0x80UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_L2_OVLAN 0x100UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_L2_OVLAN_MASK 0x200UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_L2_IVLAN 0x400UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_L2_IVLAN_MASK 0x800UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_SRC_TYPE 0x1000UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_SRC_ID 0x2000UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE 0x4000UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID 0x8000UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID 0x10000UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_NUM_VLANS 0x20000UL + #define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_NUM_VLANS 0x40000UL + u8 l2_addr[6]; + u8 num_vlans; + u8 t_num_vlans; + u8 l2_addr_mask[6]; + __le16 l2_ovlan; + __le16 l2_ovlan_mask; + __le16 l2_ivlan; + __le16 l2_ivlan_mask; + u8 unused_1[2]; + u8 t_l2_addr[6]; + u8 unused_2[2]; + u8 t_l2_addr_mask[6]; + __le16 t_l2_ovlan; + __le16 t_l2_ovlan_mask; + __le16 t_l2_ivlan; + __le16 t_l2_ivlan_mask; + u8 src_type; + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG 0x7UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_LAST CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG + u8 unused_3; + __le32 src_id; + u8 tunnel_type; + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL + u8 unused_4; + __le16 dst_id; + __le16 mirror_vnic_id; + u8 pri_hint; + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_LAST CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN + u8 unused_5; + __le32 unused_6; + __le64 l2_filter_id_hint; +}; + +/* hwrm_cfa_l2_filter_alloc_output (size:192b/24B) */ +struct hwrm_cfa_l2_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 l2_filter_id; + __le32 flow_id; + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_VALUE_SFT 0 + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE 0x40000000UL + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_INT (0x0UL << 30) + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT (0x1UL << 30) + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_LAST CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR 0x80000000UL + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_RX (0x0UL << 31) + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX (0x1UL << 31) + #define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_LAST CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_cfa_l2_filter_free_input (size:192b/24B) */ +struct hwrm_cfa_l2_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 l2_filter_id; +}; + +/* hwrm_cfa_l2_filter_free_output (size:128b/16B) */ +struct hwrm_cfa_l2_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_l2_filter_cfg_input (size:384b/48B) */ +struct hwrm_cfa_l2_filter_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX + #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2 + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK 0x30UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT 4 + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE (0x0UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_BYPASS_LKUP (0x1UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_ENABLE_LKUP (0x2UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_RESTORE_FW_OP (0x3UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_RESTORE_FW_OP + __le32 enables; + #define CFA_L2_FILTER_CFG_REQ_ENABLES_DST_ID 0x1UL + #define CFA_L2_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID 0x2UL + #define CFA_L2_FILTER_CFG_REQ_ENABLES_PROF_FUNC 0x4UL + #define CFA_L2_FILTER_CFG_REQ_ENABLES_L2_CONTEXT_ID 0x8UL + __le64 l2_filter_id; + __le32 dst_id; + __le32 new_mirror_vnic_id; + __le32 prof_func; + __le32 l2_context_id; +}; + +/* hwrm_cfa_l2_filter_cfg_output (size:128b/16B) */ +struct hwrm_cfa_l2_filter_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_l2_set_rx_mask_input (size:448b/56B) */ +struct hwrm_cfa_l2_set_rx_mask_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 vnic_id; + __le32 mask; + #define CFA_L2_SET_RX_MASK_REQ_MASK_MCAST 0x2UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST 0x4UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_BCAST 0x8UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS 0x10UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_OUTERMOST 0x20UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_VLANONLY 0x40UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_VLAN_NONVLAN 0x80UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_ANYVLAN_NONVLAN 0x100UL + __le64 mc_tbl_addr; + __le32 num_mc_entries; + u8 unused_0[4]; + __le64 vlan_tag_tbl_addr; + __le32 num_vlan_tags; + u8 unused_1[4]; +}; + +/* hwrm_cfa_l2_set_rx_mask_output (size:128b/16B) */ +struct hwrm_cfa_l2_set_rx_mask_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_l2_set_rx_mask_cmd_err (size:64b/8B) */ +struct hwrm_cfa_l2_set_rx_mask_cmd_err { + u8 code; + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR + u8 unused_0[7]; +}; + +/* hwrm_cfa_tunnel_filter_alloc_input (size:704b/88B) */ +struct hwrm_cfa_tunnel_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_TUNNEL_FILTER_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL + __le32 enables; + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_L2_ADDR 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN 0x4UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_L3_ADDR 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_L3_ADDR_TYPE 0x10UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_T_L3_ADDR_TYPE 0x20UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_T_L3_ADDR 0x40UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE 0x80UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_VNI 0x100UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_DST_VNIC_ID 0x200UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID 0x400UL + __le64 l2_filter_id; + u8 l2_addr[6]; + __le16 l2_ivlan; + __le32 l3_addr[4]; + __le32 t_l3_addr[4]; + u8 l3_addr_type; + u8 t_l3_addr_type; + u8 tunnel_type; + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL + u8 tunnel_flags; + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_FLAGS_TUN_FLAGS_OAM_CHECKSUM_EXPLHDR 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_FLAGS_TUN_FLAGS_CRITICAL_OPT_S1 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_FLAGS_TUN_FLAGS_EXTHDR_SEQNUM_S0 0x4UL + __le32 vni; + __le32 dst_vnic_id; + __le32 mirror_vnic_id; +}; + +/* hwrm_cfa_tunnel_filter_alloc_output (size:192b/24B) */ +struct hwrm_cfa_tunnel_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 tunnel_filter_id; + __le32 flow_id; + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_VALUE_SFT 0 + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE 0x40000000UL + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_INT (0x0UL << 30) + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT (0x1UL << 30) + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_LAST CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR 0x80000000UL + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_RX (0x0UL << 31) + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX (0x1UL << 31) + #define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_LAST CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_cfa_tunnel_filter_free_input (size:192b/24B) */ +struct hwrm_cfa_tunnel_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 tunnel_filter_id; +}; + +/* hwrm_cfa_tunnel_filter_free_output (size:128b/16B) */ +struct hwrm_cfa_tunnel_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_vxlan_ipv4_hdr (size:128b/16B) */ +struct hwrm_vxlan_ipv4_hdr { + u8 ver_hlen; + #define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_MASK 0xfUL + #define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT 0 + #define VXLAN_IPV4_HDR_VER_HLEN_VERSION_MASK 0xf0UL + #define VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT 4 + u8 tos; + __be16 ip_id; + __be16 flags_frag_offset; + u8 ttl; + u8 protocol; + __be32 src_ip_addr; + __be32 dest_ip_addr; +}; + +/* hwrm_vxlan_ipv6_hdr (size:320b/40B) */ +struct hwrm_vxlan_ipv6_hdr { + __be32 ver_tc_flow_label; + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_SFT 0x1cUL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_MASK 0xf0000000UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_SFT 0x14UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_MASK 0xff00000UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_SFT 0x0UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK 0xfffffUL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_LAST VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK + __be16 payload_len; + u8 next_hdr; + u8 ttl; + __be32 src_ip_addr[4]; + __be32 dest_ip_addr[4]; +}; + +/* hwrm_cfa_encap_data_vxlan (size:640b/80B) */ +struct hwrm_cfa_encap_data_vxlan { + u8 src_mac_addr[6]; + __le16 unused_0; + u8 dst_mac_addr[6]; + u8 num_vlan_tags; + u8 unused_1; + __be16 ovlan_tpid; + __be16 ovlan_tci; + __be16 ivlan_tpid; + __be16 ivlan_tci; + __le32 l3[10]; + #define CFA_ENCAP_DATA_VXLAN_L3_VER_MASK 0xfUL + #define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV4 0x4UL + #define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6 0x6UL + #define CFA_ENCAP_DATA_VXLAN_L3_LAST CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6 + __be16 src_port; + __be16 dst_port; + __be32 vni; + u8 hdr_rsvd0[3]; + u8 hdr_rsvd1; + u8 hdr_flags; + u8 unused[3]; +}; + +/* hwrm_cfa_encap_record_alloc_input (size:832b/104B) */ +struct hwrm_cfa_encap_record_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_EXTERNAL 0x2UL + u8 encap_type; + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_V4 0x9UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE_V1 0xaUL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2_ETYPE 0xbUL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE 0x10UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_LAST CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE + u8 unused_0[3]; + __le32 encap_data[20]; +}; + +/* hwrm_cfa_encap_record_alloc_output (size:128b/16B) */ +struct hwrm_cfa_encap_record_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 encap_record_id; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_cfa_encap_record_free_input (size:192b/24B) */ +struct hwrm_cfa_encap_record_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 encap_record_id; + u8 unused_0[4]; +}; + +/* hwrm_cfa_encap_record_free_output (size:128b/16B) */ +struct hwrm_cfa_encap_record_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_ntuple_filter_alloc_input (size:1024b/128B) */ +struct hwrm_cfa_ntuple_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_METER 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_FID 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_ARP_REPLY 0x10UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX 0x20UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_NO_L2_CONTEXT 0x40UL + __le32 enables; + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE 0x10UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR 0x20UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR_MASK 0x40UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR 0x80UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR_MASK 0x100UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL 0x200UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_PORT 0x400UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_PORT_MASK 0x800UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_PORT 0x1000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_PORT_MASK 0x2000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_PRI_HINT 0x4000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_NTUPLE_FILTER_ID 0x8000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_ID 0x10000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID 0x20000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR 0x40000UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_RFS_RING_TBL_IDX 0x80000UL + __le64 l2_filter_id; + u8 src_macaddr[6]; + __be16 ethertype; + u8 ip_addr_type; + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_LAST CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 + u8 ip_protocol; + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x11UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_ICMP 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_ICMPV6 0x3aUL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_RSVD 0xffUL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_LAST CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_RSVD + __le16 dst_id; + __le16 rfs_ring_tbl_idx; + u8 tunnel_type; + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL + u8 pri_hint; + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LAST CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST + __be32 src_ipaddr[4]; + __be32 src_ipaddr_mask[4]; + __be32 dst_ipaddr[4]; + __be32 dst_ipaddr_mask[4]; + __be16 src_port; + __be16 src_port_mask; + __be16 dst_port; + __be16 dst_port_mask; + __le64 ntuple_filter_id_hint; +}; + +/* hwrm_cfa_ntuple_filter_alloc_output (size:192b/24B) */ +struct hwrm_cfa_ntuple_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 ntuple_filter_id; + __le32 flow_id; + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_VALUE_SFT 0 + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE 0x40000000UL + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_INT (0x0UL << 30) + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT (0x1UL << 30) + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_LAST CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR 0x80000000UL + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_RX (0x0UL << 31) + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX (0x1UL << 31) + #define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_LAST CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */ +struct hwrm_cfa_ntuple_filter_alloc_cmd_err { + u8 code; + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR + u8 unused_0[7]; +}; + +/* hwrm_cfa_ntuple_filter_free_input (size:192b/24B) */ +struct hwrm_cfa_ntuple_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 ntuple_filter_id; +}; + +/* hwrm_cfa_ntuple_filter_free_output (size:128b/16B) */ +struct hwrm_cfa_ntuple_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_ntuple_filter_cfg_input (size:384b/48B) */ +struct hwrm_cfa_ntuple_filter_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_DST_ID 0x1UL + #define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID 0x2UL + #define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_METER_INSTANCE_ID 0x4UL + __le32 flags; + #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_FID 0x1UL + #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_RFS_RING_IDX 0x2UL + #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_NO_L2_CONTEXT 0x4UL + __le64 ntuple_filter_id; + __le32 new_dst_id; + __le32 new_mirror_vnic_id; + __le16 new_meter_instance_id; + #define CFA_NTUPLE_FILTER_CFG_REQ_NEW_METER_INSTANCE_ID_INVALID 0xffffUL + #define CFA_NTUPLE_FILTER_CFG_REQ_NEW_METER_INSTANCE_ID_LAST CFA_NTUPLE_FILTER_CFG_REQ_NEW_METER_INSTANCE_ID_INVALID + u8 unused_1[6]; +}; + +/* hwrm_cfa_ntuple_filter_cfg_output (size:128b/16B) */ +struct hwrm_cfa_ntuple_filter_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_decap_filter_alloc_input (size:832b/104B) */ +struct hwrm_cfa_decap_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL 0x1UL + __le32 enables; + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE 0x1UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID 0x2UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR 0x8UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_OVLAN_VID 0x10UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IVLAN_VID 0x20UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_OVLAN_VID 0x40UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID 0x80UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE 0x100UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR 0x200UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR 0x400UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE 0x800UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL 0x1000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_PORT 0x2000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT 0x4000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_ID 0x8000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID 0x10000UL + __be32 tunnel_id; + u8 tunnel_type; + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL + u8 unused_0; + __le16 unused_1; + u8 src_macaddr[6]; + u8 unused_2[2]; + u8 dst_macaddr[6]; + __be16 ovlan_vid; + __be16 ivlan_vid; + __be16 t_ovlan_vid; + __be16 t_ivlan_vid; + __be16 ethertype; + u8 ip_addr_type; + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_LAST CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 + u8 ip_protocol; + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x11UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_LAST CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP + __le16 unused_3; + __le32 unused_4; + __be32 src_ipaddr[4]; + __be32 dst_ipaddr[4]; + __be16 src_port; + __be16 dst_port; + __le16 dst_id; + __le16 l2_ctxt_ref_id; +}; + +/* hwrm_cfa_decap_filter_alloc_output (size:128b/16B) */ +struct hwrm_cfa_decap_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 decap_filter_id; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_cfa_decap_filter_free_input (size:192b/24B) */ +struct hwrm_cfa_decap_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 decap_filter_id; + u8 unused_0[4]; +}; + +/* hwrm_cfa_decap_filter_free_output (size:128b/16B) */ +struct hwrm_cfa_decap_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_flow_alloc_input (size:1024b/128B) */ +struct hwrm_cfa_flow_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 flags; + #define CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL 0x1UL + #define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_MASK 0x6UL + #define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_SFT 1 + #define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_NONE (0x0UL << 1) + #define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE (0x1UL << 1) + #define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO (0x2UL << 1) + #define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_LAST CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO + #define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_MASK 0x38UL + #define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_SFT 3 + #define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2 (0x0UL << 3) + #define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 (0x1UL << 3) + #define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6 (0x2UL << 3) + #define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_LAST CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6 + #define CFA_FLOW_ALLOC_REQ_FLAGS_PATH_TX 0x40UL + #define CFA_FLOW_ALLOC_REQ_FLAGS_PATH_RX 0x80UL + #define CFA_FLOW_ALLOC_REQ_FLAGS_MATCH_VXLAN_IP_VNI 0x100UL + #define CFA_FLOW_ALLOC_REQ_FLAGS_VHOST_ID_USE_VLAN 0x200UL + __le16 src_fid; + __le32 tunnel_handle; + __le16 action_flags; + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD 0x1UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_RECYCLE 0x2UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP 0x4UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_METER 0x8UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL 0x10UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC 0x20UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST 0x40UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS 0x80UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE 0x100UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TTL_DECREMENT 0x200UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL_IP 0x400UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FLOW_AGING_ENABLED 0x800UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_PRI_HINT 0x1000UL + #define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NO_FLOW_COUNTER_ALLOC 0x2000UL + __le16 dst_fid; + __be16 l2_rewrite_vlan_tpid; + __be16 l2_rewrite_vlan_tci; + __le16 act_meter_id; + __le16 ref_flow_handle; + __be16 ethertype; + __be16 outer_vlan_tci; + __be16 dmac[3]; + __be16 inner_vlan_tci; + __be16 smac[3]; + u8 ip_dst_mask_len; + u8 ip_src_mask_len; + __be32 ip_dst[4]; + __be32 ip_src[4]; + __be16 l4_src_port; + __be16 l4_src_port_mask; + __be16 l4_dst_port; + __be16 l4_dst_port_mask; + __be32 nat_ip_address[4]; + __be16 l2_rewrite_dmac[3]; + __be16 nat_port; + __be16 l2_rewrite_smac[3]; + u8 ip_proto; + u8 tunnel_type; + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + #define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_LAST CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL +}; + +/* hwrm_cfa_flow_alloc_output (size:256b/32B) */ +struct hwrm_cfa_flow_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 flow_handle; + u8 unused_0[2]; + __le32 flow_id; + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_VALUE_SFT 0 + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE 0x40000000UL + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_INT (0x0UL << 30) + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_EXT (0x1UL << 30) + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_LAST CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_EXT + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR 0x80000000UL + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_RX (0x0UL << 31) + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_TX (0x1UL << 31) + #define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_LAST CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_TX + __le64 ext_flow_handle; + __le32 flow_counter_id; + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_cfa_flow_alloc_cmd_err (size:64b/8B) */ +struct hwrm_cfa_flow_alloc_cmd_err { + u8 code; + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_L2_CONTEXT_TCAM 0x1UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_ACTION_RECORD 0x2UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_FLOW_COUNTER 0x3UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_WILD_CARD_TCAM 0x4UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_HASH_COLLISION 0x5UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_KEY_EXISTS 0x6UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_FLOW_CTXT_DB 0x7UL + #define CFA_FLOW_ALLOC_CMD_ERR_CODE_LAST CFA_FLOW_ALLOC_CMD_ERR_CODE_FLOW_CTXT_DB + u8 unused_0[7]; +}; + +/* hwrm_cfa_flow_free_input (size:256b/32B) */ +struct hwrm_cfa_flow_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 flow_handle; + __le16 unused_0; + __le32 flow_counter_id; + __le64 ext_flow_handle; +}; + +/* hwrm_cfa_flow_free_output (size:256b/32B) */ +struct hwrm_cfa_flow_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 packet; + __le64 byte; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_flow_info_input (size:256b/32B) */ +struct hwrm_cfa_flow_info_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 flow_handle; + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK 0xfffUL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_CNP_CNT 0x1000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV1_CNT 0x2000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_NIC_TX 0x3000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV2_CNT 0x4000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX 0x8000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_CNP_CNT_RX 0x9000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV1_CNT_RX 0xa000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_NIC_RX 0xb000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV2_CNT_RX 0xc000UL + #define CFA_FLOW_INFO_REQ_FLOW_HANDLE_LAST CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV2_CNT_RX + u8 unused_0[6]; + __le64 ext_flow_handle; +}; + +/* hwrm_cfa_flow_info_output (size:5632b/704B) */ +struct hwrm_cfa_flow_info_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define CFA_FLOW_INFO_RESP_FLAGS_PATH_TX 0x1UL + #define CFA_FLOW_INFO_RESP_FLAGS_PATH_RX 0x2UL + u8 profile; + __le16 src_fid; + __le16 dst_fid; + __le16 l2_ctxt_id; + __le64 em_info; + __le64 tcam_info; + __le64 vfp_tcam_info; + __le16 ar_id; + __le16 flow_handle; + __le32 tunnel_handle; + __le16 flow_timer; + u8 unused_0[6]; + __le32 flow_key_data[130]; + __le32 flow_action_info[30]; + u8 unused_1[7]; + u8 valid; +}; + +/* hwrm_cfa_flow_stats_input (size:640b/80B) */ +struct hwrm_cfa_flow_stats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 num_flows; + __le16 flow_handle_0; + __le16 flow_handle_1; + __le16 flow_handle_2; + __le16 flow_handle_3; + __le16 flow_handle_4; + __le16 flow_handle_5; + __le16 flow_handle_6; + __le16 flow_handle_7; + __le16 flow_handle_8; + __le16 flow_handle_9; + u8 unused_0[2]; + __le32 flow_id_0; + __le32 flow_id_1; + __le32 flow_id_2; + __le32 flow_id_3; + __le32 flow_id_4; + __le32 flow_id_5; + __le32 flow_id_6; + __le32 flow_id_7; + __le32 flow_id_8; + __le32 flow_id_9; +}; + +/* hwrm_cfa_flow_stats_output (size:1408b/176B) */ +struct hwrm_cfa_flow_stats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 packet_0; + __le64 packet_1; + __le64 packet_2; + __le64 packet_3; + __le64 packet_4; + __le64 packet_5; + __le64 packet_6; + __le64 packet_7; + __le64 packet_8; + __le64 packet_9; + __le64 byte_0; + __le64 byte_1; + __le64 byte_2; + __le64 byte_3; + __le64 byte_4; + __le64 byte_5; + __le64 byte_6; + __le64 byte_7; + __le64 byte_8; + __le64 byte_9; + __le16 flow_hits; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_cfa_vfr_alloc_input (size:448b/56B) */ +struct hwrm_cfa_vfr_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 vf_id; + __le16 reserved; + u8 unused_0[4]; + char vfr_name[32]; +}; + +/* hwrm_cfa_vfr_alloc_output (size:128b/16B) */ +struct hwrm_cfa_vfr_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 rx_cfa_code; + __le16 tx_cfa_action; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_cfa_vfr_free_input (size:448b/56B) */ +struct hwrm_cfa_vfr_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + char vfr_name[32]; + __le16 vf_id; + __le16 reserved; + u8 unused_0[4]; +}; + +/* hwrm_cfa_vfr_free_output (size:128b/16B) */ +struct hwrm_cfa_vfr_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_eem_qcaps_input (size:192b/24B) */ +struct hwrm_cfa_eem_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_EEM_QCAPS_REQ_FLAGS_PATH_TX 0x1UL + #define CFA_EEM_QCAPS_REQ_FLAGS_PATH_RX 0x2UL + #define CFA_EEM_QCAPS_REQ_FLAGS_PREFERRED_OFFLOAD 0x4UL + __le32 unused_0; +}; + +/* hwrm_cfa_eem_qcaps_output (size:320b/40B) */ +struct hwrm_cfa_eem_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define CFA_EEM_QCAPS_RESP_FLAGS_PATH_TX 0x1UL + #define CFA_EEM_QCAPS_RESP_FLAGS_PATH_RX 0x2UL + #define CFA_EEM_QCAPS_RESP_FLAGS_CENTRALIZED_MEMORY_MODEL_SUPPORTED 0x4UL + #define CFA_EEM_QCAPS_RESP_FLAGS_DETACHED_CENTRALIZED_MEMORY_MODEL_SUPPORTED 0x8UL + __le32 unused_0; + __le32 supported; + #define CFA_EEM_QCAPS_RESP_SUPPORTED_KEY0_TABLE 0x1UL + #define CFA_EEM_QCAPS_RESP_SUPPORTED_KEY1_TABLE 0x2UL + #define CFA_EEM_QCAPS_RESP_SUPPORTED_EXTERNAL_RECORD_TABLE 0x4UL + #define CFA_EEM_QCAPS_RESP_SUPPORTED_EXTERNAL_FLOW_COUNTERS_TABLE 0x8UL + #define CFA_EEM_QCAPS_RESP_SUPPORTED_FID_TABLE 0x10UL + __le32 max_entries_supported; + __le16 key_entry_size; + __le16 record_entry_size; + __le16 efc_entry_size; + __le16 fid_entry_size; + u8 unused_1[7]; + u8 valid; +}; + +/* hwrm_cfa_eem_cfg_input (size:384b/48B) */ +struct hwrm_cfa_eem_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_EEM_CFG_REQ_FLAGS_PATH_TX 0x1UL + #define CFA_EEM_CFG_REQ_FLAGS_PATH_RX 0x2UL + #define CFA_EEM_CFG_REQ_FLAGS_PREFERRED_OFFLOAD 0x4UL + #define CFA_EEM_CFG_REQ_FLAGS_SECONDARY_PF 0x8UL + __le16 group_id; + __le16 unused_0; + __le32 num_entries; + __le32 unused_1; + __le16 key0_ctx_id; + __le16 key1_ctx_id; + __le16 record_ctx_id; + __le16 efc_ctx_id; + __le16 fid_ctx_id; + __le16 unused_2; + __le32 unused_3; +}; + +/* hwrm_cfa_eem_cfg_output (size:128b/16B) */ +struct hwrm_cfa_eem_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_eem_qcfg_input (size:192b/24B) */ +struct hwrm_cfa_eem_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_EEM_QCFG_REQ_FLAGS_PATH_TX 0x1UL + #define CFA_EEM_QCFG_REQ_FLAGS_PATH_RX 0x2UL + __le32 unused_0; +}; + +/* hwrm_cfa_eem_qcfg_output (size:256b/32B) */ +struct hwrm_cfa_eem_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define CFA_EEM_QCFG_RESP_FLAGS_PATH_TX 0x1UL + #define CFA_EEM_QCFG_RESP_FLAGS_PATH_RX 0x2UL + #define CFA_EEM_QCFG_RESP_FLAGS_PREFERRED_OFFLOAD 0x4UL + __le32 num_entries; + __le16 key0_ctx_id; + __le16 key1_ctx_id; + __le16 record_ctx_id; + __le16 efc_ctx_id; + __le16 fid_ctx_id; + u8 unused_2[5]; + u8 valid; +}; + +/* hwrm_cfa_eem_op_input (size:192b/24B) */ +struct hwrm_cfa_eem_op_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_EEM_OP_REQ_FLAGS_PATH_TX 0x1UL + #define CFA_EEM_OP_REQ_FLAGS_PATH_RX 0x2UL + __le16 unused_0; + __le16 op; + #define CFA_EEM_OP_REQ_OP_RESERVED 0x0UL + #define CFA_EEM_OP_REQ_OP_EEM_DISABLE 0x1UL + #define CFA_EEM_OP_REQ_OP_EEM_ENABLE 0x2UL + #define CFA_EEM_OP_REQ_OP_EEM_CLEANUP 0x3UL + #define CFA_EEM_OP_REQ_OP_LAST CFA_EEM_OP_REQ_OP_EEM_CLEANUP +}; + +/* hwrm_cfa_eem_op_output (size:128b/16B) */ +struct hwrm_cfa_eem_op_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_cfa_adv_flow_mgnt_qcaps_input (size:256b/32B) */ +struct hwrm_cfa_adv_flow_mgnt_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 unused_0[4]; +}; + +/* hwrm_cfa_adv_flow_mgnt_qcaps_output (size:128b/16B) */ +struct hwrm_cfa_adv_flow_mgnt_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED 0x1UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED 0x2UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED 0x4UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED 0x8UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED 0x10UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED 0x20UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED 0x40UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED 0x80UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED 0x100UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED 0x200UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED 0x400UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED 0x800UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED 0x1000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED 0x2000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED 0x4000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE 0x8000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED 0x10000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_LAG_SUPPORTED 0x20000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_NO_L2CTX_SUPPORTED 0x40000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NIC_FLOW_STATS_SUPPORTED 0x80000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_EXT_IP_PROTO_SUPPORTED 0x100000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V3_SUPPORTED 0x200000UL + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */ +struct hwrm_tunnel_dst_port_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 tunnel_type; + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES03 0x15UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES04 0x16UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES05 0x17UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES06 0x18UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 0x19UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 + u8 tunnel_next_proto; + u8 unused_0[6]; +}; + +/* hwrm_tunnel_dst_port_query_output (size:128b/16B) */ +struct hwrm_tunnel_dst_port_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 tunnel_dst_port_id; + __be16 tunnel_dst_port_val; + u8 upar_in_use; + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR0 0x1UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR1 0x2UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR2 0x4UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR3 0x8UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR4 0x10UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR5 0x20UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR6 0x40UL + #define TUNNEL_DST_PORT_QUERY_RESP_UPAR_IN_USE_UPAR7 0x80UL + u8 status; + #define TUNNEL_DST_PORT_QUERY_RESP_STATUS_CHIP_LEVEL 0x1UL + #define TUNNEL_DST_PORT_QUERY_RESP_STATUS_FUNC_LEVEL 0x2UL + u8 unused_0; + u8 valid; +}; + +/* hwrm_tunnel_dst_port_alloc_input (size:192b/24B) */ +struct hwrm_tunnel_dst_port_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 tunnel_type; + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES03 0x15UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES04 0x16UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES05 0x17UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES06 0x18UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 0x19UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 + u8 tunnel_next_proto; + __be16 tunnel_dst_port_val; + u8 unused_0[4]; +}; + +/* hwrm_tunnel_dst_port_alloc_output (size:128b/16B) */ +struct hwrm_tunnel_dst_port_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 tunnel_dst_port_id; + u8 error_info; + #define TUNNEL_DST_PORT_ALLOC_RESP_ERROR_INFO_SUCCESS 0x0UL + #define TUNNEL_DST_PORT_ALLOC_RESP_ERROR_INFO_ERR_ALLOCATED 0x1UL + #define TUNNEL_DST_PORT_ALLOC_RESP_ERROR_INFO_ERR_NO_RESOURCE 0x2UL + #define TUNNEL_DST_PORT_ALLOC_RESP_ERROR_INFO_ERR_ENABLED 0x3UL + #define TUNNEL_DST_PORT_ALLOC_RESP_ERROR_INFO_LAST TUNNEL_DST_PORT_ALLOC_RESP_ERROR_INFO_ERR_ENABLED + u8 upar_in_use; + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR0 0x1UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR1 0x2UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR2 0x4UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR3 0x8UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR4 0x10UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR5 0x20UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR6 0x40UL + #define TUNNEL_DST_PORT_ALLOC_RESP_UPAR_IN_USE_UPAR7 0x80UL + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_tunnel_dst_port_free_input (size:192b/24B) */ +struct hwrm_tunnel_dst_port_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 tunnel_type; + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES03 0x15UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES04 0x16UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES05 0x17UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES06 0x18UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 0x19UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 + u8 tunnel_next_proto; + __le16 tunnel_dst_port_id; + u8 unused_0[4]; +}; + +/* hwrm_tunnel_dst_port_free_output (size:128b/16B) */ +struct hwrm_tunnel_dst_port_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 error_info; + #define TUNNEL_DST_PORT_FREE_RESP_ERROR_INFO_SUCCESS 0x0UL + #define TUNNEL_DST_PORT_FREE_RESP_ERROR_INFO_ERR_NOT_OWNER 0x1UL + #define TUNNEL_DST_PORT_FREE_RESP_ERROR_INFO_ERR_NOT_ALLOCATED 0x2UL + #define TUNNEL_DST_PORT_FREE_RESP_ERROR_INFO_LAST TUNNEL_DST_PORT_FREE_RESP_ERROR_INFO_ERR_NOT_ALLOCATED + u8 unused_1[6]; + u8 valid; +}; + +/* ctx_hw_stats (size:1280b/160B) */ +struct ctx_hw_stats { + __le64 rx_ucast_pkts; + __le64 rx_mcast_pkts; + __le64 rx_bcast_pkts; + __le64 rx_discard_pkts; + __le64 rx_error_pkts; + __le64 rx_ucast_bytes; + __le64 rx_mcast_bytes; + __le64 rx_bcast_bytes; + __le64 tx_ucast_pkts; + __le64 tx_mcast_pkts; + __le64 tx_bcast_pkts; + __le64 tx_error_pkts; + __le64 tx_discard_pkts; + __le64 tx_ucast_bytes; + __le64 tx_mcast_bytes; + __le64 tx_bcast_bytes; + __le64 tpa_pkts; + __le64 tpa_bytes; + __le64 tpa_events; + __le64 tpa_aborts; +}; + +/* ctx_hw_stats_ext (size:1408b/176B) */ +struct ctx_hw_stats_ext { + __le64 rx_ucast_pkts; + __le64 rx_mcast_pkts; + __le64 rx_bcast_pkts; + __le64 rx_discard_pkts; + __le64 rx_error_pkts; + __le64 rx_ucast_bytes; + __le64 rx_mcast_bytes; + __le64 rx_bcast_bytes; + __le64 tx_ucast_pkts; + __le64 tx_mcast_pkts; + __le64 tx_bcast_pkts; + __le64 tx_error_pkts; + __le64 tx_discard_pkts; + __le64 tx_ucast_bytes; + __le64 tx_mcast_bytes; + __le64 tx_bcast_bytes; + __le64 rx_tpa_eligible_pkt; + __le64 rx_tpa_eligible_bytes; + __le64 rx_tpa_pkt; + __le64 rx_tpa_bytes; + __le64 rx_tpa_errors; + __le64 rx_tpa_events; +}; + +/* hwrm_stat_ctx_alloc_input (size:384b/48B) */ +struct hwrm_stat_ctx_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 stats_dma_addr; + __le32 update_period_ms; + u8 stat_ctx_flags; + #define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE 0x1UL + #define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_DUP_HOST_BUF 0x2UL + u8 unused_0; + __le16 stats_dma_length; + __le16 flags; + #define STAT_CTX_ALLOC_REQ_FLAGS_STEERING_TAG_VALID 0x1UL + __le16 steering_tag; + __le32 stat_ctx_id; + __le16 alloc_seq_id; + u8 unused_1[6]; +}; + +/* hwrm_stat_ctx_alloc_output (size:128b/16B) */ +struct hwrm_stat_ctx_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 stat_ctx_id; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_stat_ctx_free_input (size:192b/24B) */ +struct hwrm_stat_ctx_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 stat_ctx_id; + u8 unused_0[4]; +}; + +/* hwrm_stat_ctx_free_output (size:128b/16B) */ +struct hwrm_stat_ctx_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 stat_ctx_id; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_stat_ctx_query_input (size:192b/24B) */ +struct hwrm_stat_ctx_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 stat_ctx_id; + u8 flags; + #define STAT_CTX_QUERY_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[3]; +}; + +/* hwrm_stat_ctx_query_output (size:1408b/176B) */ +struct hwrm_stat_ctx_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 tx_ucast_pkts; + __le64 tx_mcast_pkts; + __le64 tx_bcast_pkts; + __le64 tx_discard_pkts; + __le64 tx_error_pkts; + __le64 tx_ucast_bytes; + __le64 tx_mcast_bytes; + __le64 tx_bcast_bytes; + __le64 rx_ucast_pkts; + __le64 rx_mcast_pkts; + __le64 rx_bcast_pkts; + __le64 rx_discard_pkts; + __le64 rx_error_pkts; + __le64 rx_ucast_bytes; + __le64 rx_mcast_bytes; + __le64 rx_bcast_bytes; + __le64 rx_agg_pkts; + __le64 rx_agg_bytes; + __le64 rx_agg_events; + __le64 rx_agg_aborts; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_stat_ext_ctx_query_input (size:192b/24B) */ +struct hwrm_stat_ext_ctx_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 stat_ctx_id; + u8 flags; + #define STAT_EXT_CTX_QUERY_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[3]; +}; + +/* hwrm_stat_ext_ctx_query_output (size:1536b/192B) */ +struct hwrm_stat_ext_ctx_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 rx_ucast_pkts; + __le64 rx_mcast_pkts; + __le64 rx_bcast_pkts; + __le64 rx_discard_pkts; + __le64 rx_error_pkts; + __le64 rx_ucast_bytes; + __le64 rx_mcast_bytes; + __le64 rx_bcast_bytes; + __le64 tx_ucast_pkts; + __le64 tx_mcast_pkts; + __le64 tx_bcast_pkts; + __le64 tx_error_pkts; + __le64 tx_discard_pkts; + __le64 tx_ucast_bytes; + __le64 tx_mcast_bytes; + __le64 tx_bcast_bytes; + __le64 rx_tpa_eligible_pkt; + __le64 rx_tpa_eligible_bytes; + __le64 rx_tpa_pkt; + __le64 rx_tpa_bytes; + __le64 rx_tpa_errors; + __le64 rx_tpa_events; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_stat_ctx_clr_stats_input (size:192b/24B) */ +struct hwrm_stat_ctx_clr_stats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 stat_ctx_id; + u8 unused_0[4]; +}; + +/* hwrm_stat_ctx_clr_stats_output (size:128b/16B) */ +struct hwrm_stat_ctx_clr_stats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_pcie_qstats_input (size:256b/32B) */ +struct hwrm_pcie_qstats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 pcie_stat_size; + u8 unused_0[6]; + __le64 pcie_stat_host_addr; +}; + +/* hwrm_pcie_qstats_output (size:128b/16B) */ +struct hwrm_pcie_qstats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 pcie_stat_size; + u8 unused_0[5]; + u8 valid; +}; + +/* pcie_ctx_hw_stats (size:768b/96B) */ +struct pcie_ctx_hw_stats { + __le64 pcie_pl_signal_integrity; + __le64 pcie_dl_signal_integrity; + __le64 pcie_tl_signal_integrity; + __le64 pcie_link_integrity; + __le64 pcie_tx_traffic_rate; + __le64 pcie_rx_traffic_rate; + __le64 pcie_tx_dllp_statistics; + __le64 pcie_rx_dllp_statistics; + __le64 pcie_equalization_time; + __le32 pcie_ltssm_histogram[4]; + __le64 pcie_recovery_histogram; +}; + +/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */ +struct pcie_ctx_hw_stats_v2 { + __le64 pcie_pl_signal_integrity; + __le64 pcie_dl_signal_integrity; + __le64 pcie_tl_signal_integrity; + __le64 pcie_link_integrity; + __le64 pcie_tx_traffic_rate; + __le64 pcie_rx_traffic_rate; + __le64 pcie_tx_dllp_statistics; + __le64 pcie_rx_dllp_statistics; + __le64 pcie_equalization_time; + __le32 pcie_ltssm_histogram[4]; + __le64 pcie_recovery_histogram; + __le32 pcie_tl_credit_nph_histogram[8]; + __le32 pcie_tl_credit_ph_histogram[8]; + __le32 pcie_tl_credit_pd_histogram[8]; + __le32 pcie_cmpl_latest_times[4]; + __le32 pcie_cmpl_longest_time; + __le32 pcie_cmpl_shortest_time; + __le32 unused_0[2]; + __le32 pcie_cmpl_latest_headers[4][4]; + __le32 pcie_cmpl_longest_headers[4][4]; + __le32 pcie_cmpl_shortest_headers[4][4]; + __le32 pcie_wr_latency_histogram[12]; + __le32 pcie_wr_latency_all_normal_count; + __le32 unused_1; + __le64 pcie_posted_packet_count; + __le64 pcie_non_posted_packet_count; + __le64 pcie_other_packet_count; + __le64 pcie_blocked_packet_count; + __le64 pcie_cmpl_packet_count; +}; + +/* hwrm_stat_generic_qstats_input (size:256b/32B) */ +struct hwrm_stat_generic_qstats_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 generic_stat_size; + u8 flags; + #define STAT_GENERIC_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[5]; + __le64 generic_stat_host_addr; +}; + +/* hwrm_stat_generic_qstats_output (size:128b/16B) */ +struct hwrm_stat_generic_qstats_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 generic_stat_size; + u8 unused_0[5]; + u8 valid; +}; + +/* generic_sw_hw_stats (size:1472b/184B) */ +struct generic_sw_hw_stats { + __le64 pcie_statistics_tx_tlp; + __le64 pcie_statistics_rx_tlp; + __le64 pcie_credit_fc_hdr_posted; + __le64 pcie_credit_fc_hdr_nonposted; + __le64 pcie_credit_fc_hdr_cmpl; + __le64 pcie_credit_fc_data_posted; + __le64 pcie_credit_fc_data_nonposted; + __le64 pcie_credit_fc_data_cmpl; + __le64 pcie_credit_fc_tgt_nonposted; + __le64 pcie_credit_fc_tgt_data_posted; + __le64 pcie_credit_fc_tgt_hdr_posted; + __le64 pcie_credit_fc_cmpl_hdr_posted; + __le64 pcie_credit_fc_cmpl_data_posted; + __le64 pcie_cmpl_longest; + __le64 pcie_cmpl_shortest; + __le64 cache_miss_count_cfcq; + __le64 cache_miss_count_cfcs; + __le64 cache_miss_count_cfcc; + __le64 cache_miss_count_cfcm; + __le64 hw_db_recov_dbs_dropped; + __le64 hw_db_recov_drops_serviced; + __le64 hw_db_recov_dbs_recovered; + __le64 hw_db_recov_oo_drop_count; +}; + +/* hwrm_fw_reset_input (size:192b/24B) */ +struct hwrm_fw_reset_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 embedded_proc_type; + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP 0x5UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP 0x6UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT 0x7UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_IMPACTLESS_ACTIVATION 0x8UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_LAST FW_RESET_REQ_EMBEDDED_PROC_TYPE_IMPACTLESS_ACTIVATION + u8 selfrst_status; + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTIMMEDIATE 0x3UL + #define FW_RESET_REQ_SELFRST_STATUS_LAST FW_RESET_REQ_SELFRST_STATUS_SELFRSTIMMEDIATE + u8 host_idx; + u8 flags; + #define FW_RESET_REQ_FLAGS_RESET_GRACEFUL 0x1UL + #define FW_RESET_REQ_FLAGS_FW_ACTIVATION 0x2UL + u8 unused_0[4]; +}; + +/* hwrm_fw_reset_output (size:128b/16B) */ +struct hwrm_fw_reset_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 selfrst_status; + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTIMMEDIATE 0x3UL + #define FW_RESET_RESP_SELFRST_STATUS_LAST FW_RESET_RESP_SELFRST_STATUS_SELFRSTIMMEDIATE + u8 unused_0[6]; + u8 valid; +}; + +/* hwrm_fw_qstatus_input (size:192b/24B) */ +struct hwrm_fw_qstatus_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 embedded_proc_type; + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_AP 0x5UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_CHIP 0x6UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_LAST FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_CHIP + u8 unused_0[7]; +}; + +/* hwrm_fw_qstatus_output (size:128b/16B) */ +struct hwrm_fw_qstatus_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 selfrst_status; + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPOWER 0x3UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_LAST FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPOWER + u8 nvm_option_action_status; + #define FW_QSTATUS_RESP_NVM_OPTION_ACTION_STATUS_NVMOPT_ACTION_NONE 0x0UL + #define FW_QSTATUS_RESP_NVM_OPTION_ACTION_STATUS_NVMOPT_ACTION_HOTRESET 0x1UL + #define FW_QSTATUS_RESP_NVM_OPTION_ACTION_STATUS_NVMOPT_ACTION_WARMBOOT 0x2UL + #define FW_QSTATUS_RESP_NVM_OPTION_ACTION_STATUS_NVMOPT_ACTION_COLDBOOT 0x3UL + #define FW_QSTATUS_RESP_NVM_OPTION_ACTION_STATUS_LAST FW_QSTATUS_RESP_NVM_OPTION_ACTION_STATUS_NVMOPT_ACTION_COLDBOOT + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_fw_set_time_input (size:256b/32B) */ +struct hwrm_fw_set_time_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 year; + #define FW_SET_TIME_REQ_YEAR_UNKNOWN 0x0UL + #define FW_SET_TIME_REQ_YEAR_LAST FW_SET_TIME_REQ_YEAR_UNKNOWN + u8 month; + u8 day; + u8 hour; + u8 minute; + u8 second; + u8 unused_0; + __le16 millisecond; + __le16 zone; + #define FW_SET_TIME_REQ_ZONE_UTC 0 + #define FW_SET_TIME_REQ_ZONE_UNKNOWN 65535 + #define FW_SET_TIME_REQ_ZONE_LAST FW_SET_TIME_REQ_ZONE_UNKNOWN + u8 unused_1[4]; +}; + +/* hwrm_fw_set_time_output (size:128b/16B) */ +struct hwrm_fw_set_time_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_struct_hdr (size:128b/16B) */ +struct hwrm_struct_hdr { + __le16 struct_id; + #define STRUCT_HDR_STRUCT_ID_LLDP_CFG 0x41bUL + #define STRUCT_HDR_STRUCT_ID_DCBX_ETS 0x41dUL + #define STRUCT_HDR_STRUCT_ID_DCBX_PFC 0x41fUL + #define STRUCT_HDR_STRUCT_ID_DCBX_APP 0x421UL + #define STRUCT_HDR_STRUCT_ID_DCBX_FEATURE_STATE 0x422UL + #define STRUCT_HDR_STRUCT_ID_LLDP_GENERIC 0x424UL + #define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE 0x426UL + #define STRUCT_HDR_STRUCT_ID_POWER_BKUP 0x427UL + #define STRUCT_HDR_STRUCT_ID_PEER_MMAP 0x429UL + #define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE 0x1UL + #define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION 0xaUL + #define STRUCT_HDR_STRUCT_ID_RSS_V2 0x64UL + #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL + #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_COUNT 0x12cUL + #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND 0x12dUL + #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND + __le16 len; + u8 version; + #define STRUCT_HDR_VERSION_0 0x0UL + #define STRUCT_HDR_VERSION_1 0x1UL + #define STRUCT_HDR_VERSION_LAST STRUCT_HDR_VERSION_1 + u8 count; + __le16 subtype; + __le16 next_offset; + #define STRUCT_HDR_NEXT_OFFSET_LAST 0x0UL + u8 unused_0[6]; +}; + +/* hwrm_struct_data_dcbx_app (size:64b/8B) */ +struct hwrm_struct_data_dcbx_app { + __be16 protocol_id; + u8 protocol_selector; + #define STRUCT_DATA_DCBX_APP_PROTOCOL_SELECTOR_ETHER_TYPE 0x1UL + #define STRUCT_DATA_DCBX_APP_PROTOCOL_SELECTOR_TCP_PORT 0x2UL + #define STRUCT_DATA_DCBX_APP_PROTOCOL_SELECTOR_UDP_PORT 0x3UL + #define STRUCT_DATA_DCBX_APP_PROTOCOL_SELECTOR_TCP_UDP_PORT 0x4UL + #define STRUCT_DATA_DCBX_APP_PROTOCOL_SELECTOR_LAST STRUCT_DATA_DCBX_APP_PROTOCOL_SELECTOR_TCP_UDP_PORT + u8 priority; + u8 valid; + u8 unused_0[3]; +}; + +/* hwrm_fw_set_structured_data_input (size:256b/32B) */ +struct hwrm_fw_set_structured_data_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 src_data_addr; + __le16 data_len; + u8 hdr_cnt; + u8 unused_0[5]; +}; + +/* hwrm_fw_set_structured_data_output (size:128b/16B) */ +struct hwrm_fw_set_structured_data_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_fw_set_structured_data_cmd_err (size:64b/8B) */ +struct hwrm_fw_set_structured_data_cmd_err { + u8 code; + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID + u8 unused_0[7]; +}; + +/* hwrm_fw_get_structured_data_input (size:256b/32B) */ +struct hwrm_fw_get_structured_data_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 dest_data_addr; + __le16 data_len; + __le16 structure_id; + __le16 subtype; + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_UNUSED 0x0UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_ALL 0xffffUL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_ADMIN 0x100UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_PEER 0x101UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_OPERATIONAL 0x102UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_ADMIN 0x200UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER 0x201UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL 0x300UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL + u8 count; + u8 unused_0; +}; + +/* hwrm_fw_get_structured_data_output (size:128b/16B) */ +struct hwrm_fw_get_structured_data_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 hdr_cnt; + u8 unused_0[6]; + u8 valid; +}; + +/* hwrm_fw_get_structured_data_cmd_err (size:64b/8B) */ +struct hwrm_fw_get_structured_data_cmd_err { + u8 code; + #define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + #define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID + u8 unused_0[7]; +}; + +/* hwrm_fw_livepatch_query_input (size:192b/24B) */ +struct hwrm_fw_livepatch_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 fw_target; + #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_COMMON_FW 0x1UL + #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW 0x2UL + #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_LAST FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW + u8 unused_0[7]; +}; + +/* hwrm_fw_livepatch_query_output (size:640b/80B) */ +struct hwrm_fw_livepatch_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + char install_ver[32]; + char active_ver[32]; + __le16 status_flags; + #define FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL 0x1UL + #define FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE 0x2UL + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_fw_livepatch_input (size:256b/32B) */ +struct hwrm_fw_livepatch_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 opcode; + #define FW_LIVEPATCH_REQ_OPCODE_ACTIVATE 0x1UL + #define FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE 0x2UL + #define FW_LIVEPATCH_REQ_OPCODE_LAST FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE + u8 fw_target; + #define FW_LIVEPATCH_REQ_FW_TARGET_COMMON_FW 0x1UL + #define FW_LIVEPATCH_REQ_FW_TARGET_SECURE_FW 0x2UL + #define FW_LIVEPATCH_REQ_FW_TARGET_LAST FW_LIVEPATCH_REQ_FW_TARGET_SECURE_FW + u8 loadtype; + #define FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL 0x1UL + #define FW_LIVEPATCH_REQ_LOADTYPE_MEMORY_DIRECT 0x2UL + #define FW_LIVEPATCH_REQ_LOADTYPE_LAST FW_LIVEPATCH_REQ_LOADTYPE_MEMORY_DIRECT + u8 flags; + __le32 patch_len; + __le64 host_addr; +}; + +/* hwrm_fw_livepatch_output (size:128b/16B) */ +struct hwrm_fw_livepatch_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_fw_livepatch_cmd_err (size:64b/8B) */ +struct hwrm_fw_livepatch_cmd_err { + u8 code; + #define FW_LIVEPATCH_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_OPCODE 0x1UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_TARGET 0x2UL + #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_SUPPORTED 0x3UL + #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_INSTALLED 0x4UL + #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_PATCHED 0x5UL + #define FW_LIVEPATCH_CMD_ERR_CODE_AUTH_FAIL 0x6UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_HEADER 0x7UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_SIZE 0x8UL + #define FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED 0x9UL + #define FW_LIVEPATCH_CMD_ERR_CODE_LAST FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED + u8 unused_0[7]; +}; + +/* hwrm_exec_fwd_resp_input (size:1024b/128B) */ +struct hwrm_exec_fwd_resp_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 encap_request[26]; + __le16 encap_resp_target_id; + u8 unused_0[6]; +}; + +/* hwrm_exec_fwd_resp_output (size:128b/16B) */ +struct hwrm_exec_fwd_resp_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_reject_fwd_resp_input (size:1024b/128B) */ +struct hwrm_reject_fwd_resp_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 encap_request[26]; + __le16 encap_resp_target_id; + u8 unused_0[6]; +}; + +/* hwrm_reject_fwd_resp_output (size:128b/16B) */ +struct hwrm_reject_fwd_resp_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_fwd_resp_input (size:1024b/128B) */ +struct hwrm_fwd_resp_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 encap_resp_target_id; + __le16 encap_resp_cmpl_ring; + __le16 encap_resp_len; + u8 unused_0; + u8 unused_1; + __le64 encap_resp_addr; + __le32 encap_resp[24]; +}; + +/* hwrm_fwd_resp_output (size:128b/16B) */ +struct hwrm_fwd_resp_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_fwd_async_event_cmpl_input (size:320b/40B) */ +struct hwrm_fwd_async_event_cmpl_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 encap_async_event_target_id; + u8 unused_0[6]; + __le32 encap_async_event_cmpl[4]; +}; + +/* hwrm_fwd_async_event_cmpl_output (size:128b/16B) */ +struct hwrm_fwd_async_event_cmpl_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_temp_monitor_query_input (size:128b/16B) */ +struct hwrm_temp_monitor_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_temp_monitor_query_output (size:192b/24B) */ +struct hwrm_temp_monitor_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 temp; + u8 phy_temp; + u8 om_temp; + u8 flags; + #define TEMP_MONITOR_QUERY_RESP_FLAGS_TEMP_NOT_AVAILABLE 0x1UL + #define TEMP_MONITOR_QUERY_RESP_FLAGS_PHY_TEMP_NOT_AVAILABLE 0x2UL + #define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_NOT_PRESENT 0x4UL + #define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_TEMP_NOT_AVAILABLE 0x8UL + #define TEMP_MONITOR_QUERY_RESP_FLAGS_EXT_TEMP_FIELDS_AVAILABLE 0x10UL + #define TEMP_MONITOR_QUERY_RESP_FLAGS_THRESHOLD_VALUES_AVAILABLE 0x20UL + u8 temp2; + u8 phy_temp2; + u8 om_temp2; + u8 warn_threshold; + u8 critical_threshold; + u8 fatal_threshold; + u8 shutdown_threshold; + u8 unused_0[4]; + u8 valid; +}; + +/* hwrm_wol_filter_alloc_input (size:512b/64B) */ +struct hwrm_wol_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + __le32 enables; + #define WOL_FILTER_ALLOC_REQ_ENABLES_MAC_ADDRESS 0x1UL + #define WOL_FILTER_ALLOC_REQ_ENABLES_PATTERN_OFFSET 0x2UL + #define WOL_FILTER_ALLOC_REQ_ENABLES_PATTERN_BUF_SIZE 0x4UL + #define WOL_FILTER_ALLOC_REQ_ENABLES_PATTERN_BUF_ADDR 0x8UL + #define WOL_FILTER_ALLOC_REQ_ENABLES_PATTERN_MASK_ADDR 0x10UL + #define WOL_FILTER_ALLOC_REQ_ENABLES_PATTERN_MASK_SIZE 0x20UL + __le16 port_id; + u8 wol_type; + #define WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT 0x0UL + #define WOL_FILTER_ALLOC_REQ_WOL_TYPE_BMP 0x1UL + #define WOL_FILTER_ALLOC_REQ_WOL_TYPE_INVALID 0xffUL + #define WOL_FILTER_ALLOC_REQ_WOL_TYPE_LAST WOL_FILTER_ALLOC_REQ_WOL_TYPE_INVALID + u8 unused_0[5]; + u8 mac_address[6]; + __le16 pattern_offset; + __le16 pattern_buf_size; + __le16 pattern_mask_size; + u8 unused_1[4]; + __le64 pattern_buf_addr; + __le64 pattern_mask_addr; +}; + +/* hwrm_wol_filter_alloc_output (size:128b/16B) */ +struct hwrm_wol_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 wol_filter_id; + u8 unused_0[6]; + u8 valid; +}; + +/* hwrm_wol_filter_free_input (size:256b/32B) */ +struct hwrm_wol_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define WOL_FILTER_FREE_REQ_FLAGS_FREE_ALL_WOL_FILTERS 0x1UL + __le32 enables; + #define WOL_FILTER_FREE_REQ_ENABLES_WOL_FILTER_ID 0x1UL + __le16 port_id; + u8 wol_filter_id; + u8 unused_0[5]; +}; + +/* hwrm_wol_filter_free_output (size:128b/16B) */ +struct hwrm_wol_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_wol_filter_qcfg_input (size:448b/56B) */ +struct hwrm_wol_filter_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + __le16 handle; + u8 unused_0[4]; + __le64 pattern_buf_addr; + __le16 pattern_buf_size; + u8 unused_1[6]; + __le64 pattern_mask_addr; + __le16 pattern_mask_size; + u8 unused_2[6]; +}; + +/* hwrm_wol_filter_qcfg_output (size:256b/32B) */ +struct hwrm_wol_filter_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 next_handle; + u8 wol_filter_id; + u8 wol_type; + #define WOL_FILTER_QCFG_RESP_WOL_TYPE_MAGICPKT 0x0UL + #define WOL_FILTER_QCFG_RESP_WOL_TYPE_BMP 0x1UL + #define WOL_FILTER_QCFG_RESP_WOL_TYPE_INVALID 0xffUL + #define WOL_FILTER_QCFG_RESP_WOL_TYPE_LAST WOL_FILTER_QCFG_RESP_WOL_TYPE_INVALID + __le32 unused_0; + u8 mac_address[6]; + __le16 pattern_offset; + __le16 pattern_size; + __le16 pattern_mask_size; + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_wol_reason_qcfg_input (size:320b/40B) */ +struct hwrm_wol_reason_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; + __le64 wol_pkt_buf_addr; + __le16 wol_pkt_buf_size; + u8 unused_1[6]; +}; + +/* hwrm_wol_reason_qcfg_output (size:128b/16B) */ +struct hwrm_wol_reason_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 wol_filter_id; + u8 wol_reason; + #define WOL_REASON_QCFG_RESP_WOL_REASON_MAGICPKT 0x0UL + #define WOL_REASON_QCFG_RESP_WOL_REASON_BMP 0x1UL + #define WOL_REASON_QCFG_RESP_WOL_REASON_INVALID 0xffUL + #define WOL_REASON_QCFG_RESP_WOL_REASON_LAST WOL_REASON_QCFG_RESP_WOL_REASON_INVALID + u8 wol_pkt_len; + u8 unused_0[4]; + u8 valid; +}; + +/* hwrm_dbg_read_direct_input (size:256b/32B) */ +struct hwrm_dbg_read_direct_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le32 read_addr; + __le32 read_len32; +}; + +/* hwrm_dbg_read_direct_output (size:128b/16B) */ +struct hwrm_dbg_read_direct_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 crc32; + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_dbg_qcaps_input (size:192b/24B) */ +struct hwrm_dbg_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 unused_0[6]; +}; + +/* hwrm_dbg_qcaps_output (size:192b/24B) */ +struct hwrm_dbg_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 fid; + u8 unused_0[2]; + __le32 coredump_component_disable_caps; + #define DBG_QCAPS_RESP_COREDUMP_COMPONENT_DISABLE_CAPS_NVRAM 0x1UL + __le32 flags; + #define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_NVM 0x1UL + #define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR 0x2UL + #define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR 0x4UL + #define DBG_QCAPS_RESP_FLAGS_USEQ 0x8UL + #define DBG_QCAPS_RESP_FLAGS_COREDUMP_HOST_DDR 0x10UL + #define DBG_QCAPS_RESP_FLAGS_COREDUMP_HOST_CAPTURE 0x20UL + #define DBG_QCAPS_RESP_FLAGS_PTRACE 0x40UL + #define DBG_QCAPS_RESP_FLAGS_REG_ACCESS_RESTRICTED 0x80UL + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_dbg_qcfg_input (size:192b/24B) */ +struct hwrm_dbg_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + __le16 flags; + #define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_MASK 0x3UL + #define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_SFT 0 + #define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_NVM 0x0UL + #define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_HOST_DDR 0x1UL + #define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR 0x2UL + #define DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_LAST DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR + __le32 coredump_component_disable_flags; + #define DBG_QCFG_REQ_COREDUMP_COMPONENT_DISABLE_FLAGS_NVRAM 0x1UL +}; + +/* hwrm_dbg_qcfg_output (size:256b/32B) */ +struct hwrm_dbg_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 fid; + u8 unused_0[2]; + __le32 coredump_size; + __le32 flags; + #define DBG_QCFG_RESP_FLAGS_UART_LOG 0x1UL + #define DBG_QCFG_RESP_FLAGS_UART_LOG_SECONDARY 0x2UL + #define DBG_QCFG_RESP_FLAGS_FW_TRACE 0x4UL + #define DBG_QCFG_RESP_FLAGS_FW_TRACE_SECONDARY 0x8UL + #define DBG_QCFG_RESP_FLAGS_DEBUG_NOTIFY 0x10UL + #define DBG_QCFG_RESP_FLAGS_JTAG_DEBUG 0x20UL + __le16 async_cmpl_ring; + u8 unused_2[2]; + __le32 crashdump_size; + u8 unused_3[3]; + u8 valid; +}; + +/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */ +struct hwrm_dbg_crashdump_medium_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 output_dest_flags; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR 0x1UL + __le16 pg_size_lvl; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK 0x3UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT 0 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0 0x0UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1 0x1UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 0x2UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK 0x1cUL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT 2 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K (0x0UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K (0x1UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K (0x2UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M (0x3UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M (0x4UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G (0x5UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT 5 + __le32 size; + __le32 coredump_component_disable_flags; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM 0x1UL + __le32 unused_0; + __le64 pbl; +}; + +/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */ +struct hwrm_dbg_crashdump_medium_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_1[7]; + u8 valid; +}; + +/* coredump_segment_record (size:128b/16B) */ +struct coredump_segment_record { + __le16 component_id; + __le16 segment_id; + __le16 max_instances; + u8 version_hi; + u8 version_low; + u8 seg_flags; + u8 compress_flags; + #define SFLAG_COMPRESSED_ZLIB 0x1UL + u8 unused_0[2]; + __le32 segment_len; +}; + +/* hwrm_dbg_coredump_list_input (size:256b/32B) */ +struct hwrm_dbg_coredump_list_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le32 host_buf_len; + __le16 seq_no; + u8 flags; + #define DBG_COREDUMP_LIST_REQ_FLAGS_CRASHDUMP 0x1UL + u8 unused_0[1]; +}; + +/* hwrm_dbg_coredump_list_output (size:128b/16B) */ +struct hwrm_dbg_coredump_list_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define DBG_COREDUMP_LIST_RESP_FLAGS_MORE 0x1UL + u8 unused_0; + __le16 total_segments; + __le16 data_len; + u8 unused_1; + u8 valid; +}; + +/* hwrm_dbg_coredump_initiate_input (size:256b/32B) */ +struct hwrm_dbg_coredump_initiate_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 component_id; + __le16 segment_id; + __le16 instance; + __le16 unused_0; + u8 seg_flags; + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_LIVE_DATA 0x1UL + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_CRASH_DATA 0x2UL + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_COLLECT_CTX_L1_CACHE 0x4UL + u8 unused_1[7]; +}; + +/* hwrm_dbg_coredump_initiate_output (size:128b/16B) */ +struct hwrm_dbg_coredump_initiate_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* coredump_data_hdr (size:128b/16B) */ +struct coredump_data_hdr { + __le32 address; + __le32 flags_length; + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_MASK 0xffffffUL + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_SFT 0 + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_INDIRECT_ACCESS 0x1000000UL + __le32 instance; + __le32 next_offset; +}; + +/* hwrm_dbg_coredump_retrieve_input (size:448b/56B) */ +struct hwrm_dbg_coredump_retrieve_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le32 host_buf_len; + __le32 unused_0; + __le16 component_id; + __le16 segment_id; + __le16 instance; + __le16 unused_1; + u8 seg_flags; + u8 unused_2; + __le16 unused_3; + __le32 unused_4; + __le32 seq_no; + __le32 unused_5; +}; + +/* hwrm_dbg_coredump_retrieve_output (size:128b/16B) */ +struct hwrm_dbg_coredump_retrieve_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define DBG_COREDUMP_RETRIEVE_RESP_FLAGS_MORE 0x1UL + u8 unused_0; + __le16 data_len; + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_dbg_ring_info_get_input (size:192b/24B) */ +struct hwrm_dbg_ring_info_get_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 ring_type; + #define DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL 0x0UL + #define DBG_RING_INFO_GET_REQ_RING_TYPE_TX 0x1UL + #define DBG_RING_INFO_GET_REQ_RING_TYPE_RX 0x2UL + #define DBG_RING_INFO_GET_REQ_RING_TYPE_NQ 0x3UL + #define DBG_RING_INFO_GET_REQ_RING_TYPE_LAST DBG_RING_INFO_GET_REQ_RING_TYPE_NQ + u8 unused_0[3]; + __le32 fw_ring_id; +}; + +/* hwrm_dbg_ring_info_get_output (size:192b/24B) */ +struct hwrm_dbg_ring_info_get_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 producer_index; + __le32 consumer_index; + __le32 cag_vector_ctrl; + __le16 st_tag; + u8 unused_0; + u8 valid; +}; + +/* hwrm_dbg_log_buffer_flush_input (size:192b/24B) */ +struct hwrm_dbg_log_buffer_flush_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_SRT_TRACE 0x0UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_SRT2_TRACE 0x1UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CRT_TRACE 0x2UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CRT2_TRACE 0x3UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP0_TRACE 0x4UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_L2_HWRM_TRACE 0x5UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ROCE_HWRM_TRACE 0x6UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA0_TRACE 0x7UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA1_TRACE 0x8UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE 0x9UL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE 0xaUL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE 0xbUL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE + u8 unused_1[2]; + __le32 flags; + #define DBG_LOG_BUFFER_FLUSH_REQ_FLAGS_FLUSH_ALL_BUFFERS 0x1UL +}; + +/* hwrm_dbg_log_buffer_flush_output (size:128b/16B) */ +struct hwrm_dbg_log_buffer_flush_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 current_buffer_offset; + u8 unused_1[3]; + u8 valid; +}; + +/* hwrm_nvm_read_input (size:320b/40B) */ +struct hwrm_nvm_read_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le16 dir_idx; + u8 unused_0[2]; + __le32 offset; + __le32 len; + u8 unused_1[4]; +}; + +/* hwrm_nvm_read_output (size:128b/16B) */ +struct hwrm_nvm_read_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_get_dir_entries_input (size:192b/24B) */ +struct hwrm_nvm_get_dir_entries_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; +}; + +/* hwrm_nvm_get_dir_entries_output (size:128b/16B) */ +struct hwrm_nvm_get_dir_entries_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_get_dir_info_input (size:128b/16B) */ +struct hwrm_nvm_get_dir_info_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_nvm_get_dir_info_output (size:192b/24B) */ +struct hwrm_nvm_get_dir_info_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 entries; + __le32 entry_length; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_write_input (size:448b/56B) */ +struct hwrm_nvm_write_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_src_addr; + __le16 dir_type; + __le16 dir_ordinal; + __le16 dir_ext; + __le16 dir_attr; + __le32 dir_data_length; + __le16 option; + __le16 flags; + #define NVM_WRITE_REQ_FLAGS_KEEP_ORIG_ACTIVE_IMG 0x1UL + #define NVM_WRITE_REQ_FLAGS_BATCH_MODE 0x2UL + #define NVM_WRITE_REQ_FLAGS_BATCH_LAST 0x4UL + #define NVM_WRITE_REQ_FLAGS_SKIP_CRID_CHECK 0x8UL + __le32 dir_item_length; + __le32 offset; + __le32 len; + __le32 unused_0; +}; + +/* hwrm_nvm_write_output (size:128b/16B) */ +struct hwrm_nvm_write_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 dir_item_length; + __le16 dir_idx; + u8 unused_0; + u8 valid; +}; + +/* hwrm_nvm_write_cmd_err (size:64b/8B) */ +struct hwrm_nvm_write_cmd_err { + u8 code; + #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL + #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL + #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_NO_SPACE + u8 unused_0[7]; +}; + +/* hwrm_nvm_modify_input (size:320b/40B) */ +struct hwrm_nvm_modify_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_src_addr; + __le16 dir_idx; + __le16 flags; + #define NVM_MODIFY_REQ_FLAGS_BATCH_MODE 0x1UL + #define NVM_MODIFY_REQ_FLAGS_BATCH_LAST 0x2UL + __le32 offset; + __le32 len; + u8 unused_1[4]; +}; + +/* hwrm_nvm_modify_output (size:128b/16B) */ +struct hwrm_nvm_modify_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_find_dir_entry_input (size:256b/32B) */ +struct hwrm_nvm_find_dir_entry_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define NVM_FIND_DIR_ENTRY_REQ_ENABLES_DIR_IDX_VALID 0x1UL + __le16 dir_idx; + __le16 dir_type; + __le16 dir_ordinal; + __le16 dir_ext; + u8 opt_ordinal; + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_MASK 0x3UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_SFT 0 + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ 0x0UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE 0x1UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT 0x2UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_LAST NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT + u8 unused_0[3]; +}; + +/* hwrm_nvm_find_dir_entry_output (size:256b/32B) */ +struct hwrm_nvm_find_dir_entry_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 dir_item_length; + __le32 dir_data_length; + __le32 fw_ver; + __le16 dir_ordinal; + __le16 dir_idx; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_erase_dir_entry_input (size:192b/24B) */ +struct hwrm_nvm_erase_dir_entry_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 dir_idx; + u8 unused_0[6]; +}; + +/* hwrm_nvm_erase_dir_entry_output (size:128b/16B) */ +struct hwrm_nvm_erase_dir_entry_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_get_dev_info_input (size:192b/24B) */ +struct hwrm_nvm_get_dev_info_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 flags; + #define NVM_GET_DEV_INFO_REQ_FLAGS_SECURITY_SOC_NVM 0x1UL + u8 unused_0[7]; +}; + +/* hwrm_nvm_get_dev_info_output (size:768b/96B) */ +struct hwrm_nvm_get_dev_info_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 manufacturer_id; + __le16 device_id; + __le32 sector_size; + __le32 nvram_size; + __le32 reserved_size; + __le32 available_size; + u8 nvm_cfg_ver_maj; + u8 nvm_cfg_ver_min; + u8 nvm_cfg_ver_upd; + u8 flags; + #define NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID 0x1UL + char pkg_name[16]; + __le16 hwrm_fw_major; + __le16 hwrm_fw_minor; + __le16 hwrm_fw_build; + __le16 hwrm_fw_patch; + __le16 mgmt_fw_major; + __le16 mgmt_fw_minor; + __le16 mgmt_fw_build; + __le16 mgmt_fw_patch; + __le16 roce_fw_major; + __le16 roce_fw_minor; + __le16 roce_fw_build; + __le16 roce_fw_patch; + __le16 netctrl_fw_major; + __le16 netctrl_fw_minor; + __le16 netctrl_fw_build; + __le16 netctrl_fw_patch; + __le16 srt2_fw_major; + __le16 srt2_fw_minor; + __le16 srt2_fw_build; + __le16 srt2_fw_patch; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_mod_dir_entry_input (size:256b/32B) */ +struct hwrm_nvm_mod_dir_entry_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 enables; + #define NVM_MOD_DIR_ENTRY_REQ_ENABLES_CHECKSUM 0x1UL + __le16 dir_idx; + __le16 dir_ordinal; + __le16 dir_ext; + __le16 dir_attr; + __le32 checksum; +}; + +/* hwrm_nvm_mod_dir_entry_output (size:128b/16B) */ +struct hwrm_nvm_mod_dir_entry_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_verify_update_input (size:192b/24B) */ +struct hwrm_nvm_verify_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 dir_type; + __le16 dir_ordinal; + __le16 dir_ext; + u8 unused_0[2]; +}; + +/* hwrm_nvm_verify_update_output (size:128b/16B) */ +struct hwrm_nvm_verify_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_install_update_input (size:192b/24B) */ +struct hwrm_nvm_install_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 install_type; + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_NORMAL 0x0UL + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL 0xffffffffUL + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_LAST NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL + __le16 flags; + #define NVM_INSTALL_UPDATE_REQ_FLAGS_ERASE_UNUSED_SPACE 0x1UL + #define NVM_INSTALL_UPDATE_REQ_FLAGS_REMOVE_UNUSED_PKG 0x2UL + #define NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG 0x4UL + #define NVM_INSTALL_UPDATE_REQ_FLAGS_VERIFY_ONLY 0x8UL + u8 unused_0[2]; +}; + +/* hwrm_nvm_install_update_output (size:192b/24B) */ +struct hwrm_nvm_install_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 installed_items; + u8 result; + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE 0xffUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE 0xfdUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER 0xfbUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER 0xf3UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE 0xf2UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER 0xecUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE 0xebUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM 0xeaUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH 0xe9UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST 0xe8UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER 0xe7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM 0xe6UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM 0xe5UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH 0xe4UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE 0xe1UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV 0xceUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID 0xcdUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR 0xccUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID 0xcbUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM 0xc5UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM 0xc4UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM 0xc3UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR 0xb9UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR 0xb8UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND 0xb0UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED 0xa7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED + u8 problem_item; + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_LAST NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE + u8 reset_required; + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_PCI 0x1UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER 0x2UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_LAST NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER + u8 unused_0[4]; + u8 valid; +}; + +/* hwrm_nvm_install_update_cmd_err (size:64b/8B) */ +struct hwrm_nvm_install_update_cmd_err { + u8 code; + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR 0x1UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE 0x2UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK 0x3UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT 0x4UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT + u8 unused_0[7]; +}; + +/* hwrm_nvm_get_variable_input (size:320b/40B) */ +struct hwrm_nvm_get_variable_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 dest_data_addr; + __le16 data_len; + __le16 option_num; + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_0 0x0UL + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF 0xffffUL + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_LAST NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF + __le16 dimensions; + __le16 index_0; + __le16 index_1; + __le16 index_2; + __le16 index_3; + u8 flags; + #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + u8 unused_0; +}; + +/* hwrm_nvm_get_variable_output (size:128b/16B) */ +struct hwrm_nvm_get_variable_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 data_len; + __le16 option_num; + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_LAST NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF + u8 unused_0[3]; + u8 valid; +}; + +/* hwrm_nvm_get_variable_cmd_err (size:64b/8B) */ +struct hwrm_nvm_get_variable_cmd_err { + u8 code; + #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT + u8 unused_0[7]; +}; + +/* hwrm_nvm_set_variable_input (size:320b/40B) */ +struct hwrm_nvm_set_variable_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 src_data_addr; + __le16 data_len; + __le16 option_num; + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_0 0x0UL + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF 0xffffUL + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_LAST NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF + __le16 dimensions; + __le16 index_0; + __le16 index_1; + __le16 index_2; + __le16 index_3; + u8 flags; + #define NVM_SET_VARIABLE_REQ_FLAGS_FORCE_FLUSH 0x1UL + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_MASK 0xeUL + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_SFT 1 + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_NONE (0x0UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1 (0x1UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_AES256 (0x2UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1_AUTH (0x3UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_LAST NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1_AUTH + #define NVM_SET_VARIABLE_REQ_FLAGS_FLAGS_UNUSED_0_MASK 0x70UL + #define NVM_SET_VARIABLE_REQ_FLAGS_FLAGS_UNUSED_0_SFT 4 + #define NVM_SET_VARIABLE_REQ_FLAGS_FACTORY_DEFAULT 0x80UL + u8 unused_0; +}; + +/* hwrm_nvm_set_variable_output (size:128b/16B) */ +struct hwrm_nvm_set_variable_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_nvm_set_variable_cmd_err (size:64b/8B) */ +struct hwrm_nvm_set_variable_cmd_err { + u8 code; + #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR + u8 unused_0[7]; +}; + +/* hwrm_selftest_qlist_input (size:128b/16B) */ +struct hwrm_selftest_qlist_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_selftest_qlist_output (size:2240b/280B) */ +struct hwrm_selftest_qlist_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 num_tests; + u8 available_tests; + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_NVM_TEST 0x1UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_LINK_TEST 0x2UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_REGISTER_TEST 0x4UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_MEMORY_TEST 0x8UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_SERDES_TEST 0x20UL + u8 offline_tests; + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_NVM_TEST 0x1UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_LINK_TEST 0x2UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_REGISTER_TEST 0x4UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_MEMORY_TEST 0x8UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0; + __le16 test_timeout; + u8 unused_1[2]; + char test_name[8][32]; + u8 eyescope_target_BER_support; + #define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E8_SUPPORTED 0x0UL + #define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E9_SUPPORTED 0x1UL + #define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E10_SUPPORTED 0x2UL + #define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E11_SUPPORTED 0x3UL + #define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E12_SUPPORTED 0x4UL + #define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_LAST SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E12_SUPPORTED + u8 unused_2[6]; + u8 valid; +}; + +/* hwrm_selftest_exec_input (size:192b/24B) */ +struct hwrm_selftest_exec_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 flags; + #define SELFTEST_EXEC_REQ_FLAGS_NVM_TEST 0x1UL + #define SELFTEST_EXEC_REQ_FLAGS_LINK_TEST 0x2UL + #define SELFTEST_EXEC_REQ_FLAGS_REGISTER_TEST 0x4UL + #define SELFTEST_EXEC_REQ_FLAGS_MEMORY_TEST 0x8UL + #define SELFTEST_EXEC_REQ_FLAGS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0[7]; +}; + +/* hwrm_selftest_exec_output (size:128b/16B) */ +struct hwrm_selftest_exec_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 requested_tests; + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_NVM_TEST 0x1UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_LINK_TEST 0x2UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_REGISTER_TEST 0x4UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_MEMORY_TEST 0x8UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_SERDES_TEST 0x20UL + u8 test_success; + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_NVM_TEST 0x1UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_LINK_TEST 0x2UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_REGISTER_TEST 0x4UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_MEMORY_TEST 0x8UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_selftest_irq_input (size:128b/16B) */ +struct hwrm_selftest_irq_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_selftest_irq_output (size:128b/16B) */ +struct hwrm_selftest_irq_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* dbc_dbc (size:64b/8B) */ +struct dbc_dbc { + __le32 index; + #define DBC_DBC_INDEX_MASK 0xffffffUL + #define DBC_DBC_INDEX_SFT 0 + #define DBC_DBC_EPOCH 0x1000000UL + #define DBC_DBC_TOGGLE_MASK 0x6000000UL + #define DBC_DBC_TOGGLE_SFT 25 + __le32 type_path_xid; + #define DBC_DBC_XID_MASK 0xfffffUL + #define DBC_DBC_XID_SFT 0 + #define DBC_DBC_PATH_MASK 0x3000000UL + #define DBC_DBC_PATH_SFT 24 + #define DBC_DBC_PATH_ROCE (0x0UL << 24) + #define DBC_DBC_PATH_L2 (0x1UL << 24) + #define DBC_DBC_PATH_ENGINE (0x2UL << 24) + #define DBC_DBC_PATH_LAST DBC_DBC_PATH_ENGINE + #define DBC_DBC_VALID 0x4000000UL + #define DBC_DBC_DEBUG_TRACE 0x8000000UL + #define DBC_DBC_TYPE_MASK 0xf0000000UL + #define DBC_DBC_TYPE_SFT 28 + #define DBC_DBC_TYPE_SQ (0x0UL << 28) + #define DBC_DBC_TYPE_RQ (0x1UL << 28) + #define DBC_DBC_TYPE_SRQ (0x2UL << 28) + #define DBC_DBC_TYPE_SRQ_ARM (0x3UL << 28) + #define DBC_DBC_TYPE_CQ (0x4UL << 28) + #define DBC_DBC_TYPE_CQ_ARMSE (0x5UL << 28) + #define DBC_DBC_TYPE_CQ_ARMALL (0x6UL << 28) + #define DBC_DBC_TYPE_CQ_ARMENA (0x7UL << 28) + #define DBC_DBC_TYPE_SRQ_ARMENA (0x8UL << 28) + #define DBC_DBC_TYPE_CQ_CUTOFF_ACK (0x9UL << 28) + #define DBC_DBC_TYPE_NQ (0xaUL << 28) + #define DBC_DBC_TYPE_NQ_ARM (0xbUL << 28) + #define DBC_DBC_TYPE_NQ_MASK (0xeUL << 28) + #define DBC_DBC_TYPE_NULL (0xfUL << 28) + #define DBC_DBC_TYPE_LAST DBC_DBC_TYPE_NULL +}; + +/* db_push_start (size:64b/8B) */ +struct db_push_start { + u64 db; + #define DB_PUSH_START_DB_INDEX_MASK 0xffffffUL + #define DB_PUSH_START_DB_INDEX_SFT 0 + #define DB_PUSH_START_DB_PI_LO_MASK 0xff000000UL + #define DB_PUSH_START_DB_PI_LO_SFT 24 + #define DB_PUSH_START_DB_XID_MASK 0xfffff00000000ULL + #define DB_PUSH_START_DB_XID_SFT 32 + #define DB_PUSH_START_DB_PI_HI_MASK 0xf0000000000000ULL + #define DB_PUSH_START_DB_PI_HI_SFT 52 + #define DB_PUSH_START_DB_TYPE_MASK 0xf000000000000000ULL + #define DB_PUSH_START_DB_TYPE_SFT 60 + #define DB_PUSH_START_DB_TYPE_PUSH_START (0xcULL << 60) + #define DB_PUSH_START_DB_TYPE_PUSH_END (0xdULL << 60) + #define DB_PUSH_START_DB_TYPE_LAST DB_PUSH_START_DB_TYPE_PUSH_END +}; + +/* db_push_end (size:64b/8B) */ +struct db_push_end { + u64 db; + #define DB_PUSH_END_DB_INDEX_MASK 0xffffffUL + #define DB_PUSH_END_DB_INDEX_SFT 0 + #define DB_PUSH_END_DB_PI_LO_MASK 0xff000000UL + #define DB_PUSH_END_DB_PI_LO_SFT 24 + #define DB_PUSH_END_DB_XID_MASK 0xfffff00000000ULL + #define DB_PUSH_END_DB_XID_SFT 32 + #define DB_PUSH_END_DB_PI_HI_MASK 0xf0000000000000ULL + #define DB_PUSH_END_DB_PI_HI_SFT 52 + #define DB_PUSH_END_DB_PATH_MASK 0x300000000000000ULL + #define DB_PUSH_END_DB_PATH_SFT 56 + #define DB_PUSH_END_DB_PATH_ROCE (0x0ULL << 56) + #define DB_PUSH_END_DB_PATH_L2 (0x1ULL << 56) + #define DB_PUSH_END_DB_PATH_ENGINE (0x2ULL << 56) + #define DB_PUSH_END_DB_PATH_LAST DB_PUSH_END_DB_PATH_ENGINE + #define DB_PUSH_END_DB_DEBUG_TRACE 0x800000000000000ULL + #define DB_PUSH_END_DB_TYPE_MASK 0xf000000000000000ULL + #define DB_PUSH_END_DB_TYPE_SFT 60 + #define DB_PUSH_END_DB_TYPE_PUSH_START (0xcULL << 60) + #define DB_PUSH_END_DB_TYPE_PUSH_END (0xdULL << 60) + #define DB_PUSH_END_DB_TYPE_LAST DB_PUSH_END_DB_TYPE_PUSH_END +}; + +/* db_push_info (size:64b/8B) */ +struct db_push_info { + u32 push_size_push_index; + #define DB_PUSH_INFO_PUSH_INDEX_MASK 0xffffffUL + #define DB_PUSH_INFO_PUSH_INDEX_SFT 0 + #define DB_PUSH_INFO_PUSH_SIZE_MASK 0x1f000000UL + #define DB_PUSH_INFO_PUSH_SIZE_SFT 24 + u32 reserved32; +}; + +/* fw_status_reg (size:32b/4B) */ +struct fw_status_reg { + u32 fw_status; + #define FW_STATUS_REG_CODE_MASK 0xffffUL + #define FW_STATUS_REG_CODE_SFT 0 + #define FW_STATUS_REG_CODE_READY 0x8000UL + #define FW_STATUS_REG_CODE_LAST FW_STATUS_REG_CODE_READY + #define FW_STATUS_REG_IMAGE_DEGRADED 0x10000UL + #define FW_STATUS_REG_RECOVERABLE 0x20000UL + #define FW_STATUS_REG_CRASHDUMP_ONGOING 0x40000UL + #define FW_STATUS_REG_CRASHDUMP_COMPLETE 0x80000UL + #define FW_STATUS_REG_SHUTDOWN 0x100000UL + #define FW_STATUS_REG_CRASHED_NO_MASTER 0x200000UL + #define FW_STATUS_REG_RECOVERING 0x400000UL + #define FW_STATUS_REG_MANU_DEBUG_STATUS 0x800000UL +}; + +/* hcomm_status (size:64b/8B) */ +struct hcomm_status { + u32 sig_ver; + #define HCOMM_STATUS_VER_MASK 0xffUL + #define HCOMM_STATUS_VER_SFT 0 + #define HCOMM_STATUS_VER_LATEST 0x1UL + #define HCOMM_STATUS_VER_LAST HCOMM_STATUS_VER_LATEST + #define HCOMM_STATUS_SIGNATURE_MASK 0xffffff00UL + #define HCOMM_STATUS_SIGNATURE_SFT 8 + #define HCOMM_STATUS_SIGNATURE_VAL (0x484353UL << 8) + #define HCOMM_STATUS_SIGNATURE_LAST HCOMM_STATUS_SIGNATURE_VAL + u32 fw_status_loc; + #define HCOMM_STATUS_TRUE_ADDR_SPACE_MASK 0x3UL + #define HCOMM_STATUS_TRUE_ADDR_SPACE_SFT 0 + #define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_PCIE_CFG 0x0UL + #define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_GRC 0x1UL + #define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_BAR0 0x2UL + #define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_BAR1 0x3UL + #define HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_LAST HCOMM_STATUS_FW_STATUS_LOC_ADDR_SPACE_BAR1 + #define HCOMM_STATUS_TRUE_OFFSET_MASK 0xfffffffcUL + #define HCOMM_STATUS_TRUE_OFFSET_SFT 2 +}; +#define HCOMM_STATUS_STRUCT_LOC 0x31001F0UL + +#endif /* _BNXT_HSI_H_ */ -- cgit v1.2.3 From 941ab0b369c983f7867de54c8579fd7f1676ee3c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2025 17:23:28 -0700 Subject: rcutorture: Remove support for SRCU-lite Because SRCU-lite is being replaced by SRCU-fast, this commit removes support for SRCU-lite from rcutorture.c Both SRCU-lite and SRCU-fast provide faster readers by dropping the smp_mb() call from their lock and unlock primitives, but incur a pair of added RCU grace periods during the SRCU grace period. There is a trivial mapping from the SRCU-lite API to that of SRCU-fast, so there should be no transition issues. [ paulmck: Apply Christoph Hellwig feedback. ] Signed-off-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) --- include/linux/srcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 900b0d5c05f5..c20dacb563e5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -49,7 +49,7 @@ int init_srcu_struct(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). #define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ - SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above. + SRCU_READ_FLAVOR_FAST) // All of the above. #define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // Flavors requiring synchronize_rcu() // instead of smp_mb(). -- cgit v1.2.3 From 623baa01d5b43ca06ba337751d9a4f62199d1715 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2025 17:23:29 -0700 Subject: srcu: Remove SRCU-lite implementation This commit removes the SRCU-lite implementation, which has been replaced by SRCU-fast. Both SRCU-lite and SRCU-fast provide faster readers by dropping the smp_mb() call from their lock and unlock primitives, but incur a pair of added RCU grace periods during the SRCU grace period. There is a trivial mapping from the SRCU-lite API to that of SRCU-fast, so there should be no transition issues. [ paulmck: Apply Christoph Hellwig feedback. ] Signed-off-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) --- include/linux/srcu.h | 47 ++--------------------------------------------- include/linux/srcutiny.h | 3 --- include/linux/srcutree.h | 38 -------------------------------------- 3 files changed, 2 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c20dacb563e5..cf711a0f440b 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -46,11 +46,11 @@ int init_srcu_struct(struct srcu_struct *ssp); /* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */ #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). -#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +// 0x4 // SRCU-lite is no longer with us. #define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ SRCU_READ_FLAVOR_FAST) // All of the above. -#define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) +#define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_FAST // Flavors requiring synchronize_rcu() // instead of smp_mb(). void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); @@ -299,33 +299,6 @@ static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct * return __srcu_read_lock_fast(ssp); } -/** - * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. - * @ssp: srcu_struct in which to register the new reader. - * - * Enter an SRCU read-side critical section, but for a light-weight - * smp_mb()-free reader. See srcu_read_lock() for more information. - * - * If srcu_read_lock_lite() is ever used on an srcu_struct structure, - * then none of the other flavors may be used, whether before, during, - * or after. Note that grace-period auto-expediting is disabled for _lite - * srcu_struct structures because auto-expedited grace periods invoke - * synchronize_rcu_expedited(), IPIs and all. - * - * Note that srcu_read_lock_lite() can be invoked only from those contexts - * where RCU is watching, that is, from contexts where it would be legal - * to invoke rcu_read_lock(). Otherwise, lockdep will complain. - */ -static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) -{ - int retval; - - srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_LITE); - retval = __srcu_read_lock_lite(ssp); - rcu_try_lock_acquire(&ssp->dep_map); - return retval; -} - /** * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -434,22 +407,6 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __ __srcu_read_unlock_fast(ssp, scp); } -/** - * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. - * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock_lite(). - * - * Exit a light-weight SRCU read-side critical section. - */ -static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) - __releases(ssp) -{ - WARN_ON_ONCE(idx & ~0x1); - srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); - srcu_lock_release(&ssp->dep_map); - __srcu_read_unlock_lite(ssp, idx); -} - /** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 380260317d98..51ce25f07930 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -93,9 +93,6 @@ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp)); } -#define __srcu_read_lock_lite __srcu_read_lock -#define __srcu_read_unlock_lite __srcu_read_unlock - static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { synchronize_srcu(ssp); diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 8bed7e6cc4c1..bf44d8d1e69e 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -278,44 +278,6 @@ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } -/* - * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Returns an index that must be passed to the matching - * srcu_read_unlock_lite(). - * - * Note that this_cpu_inc() is an RCU read-side critical section either - * because it disables interrupts, because it is a single instruction, - * or because it is a read-modify-write atomic operation, depending on - * the whims of the architecture. - */ -static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) -{ - struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); - - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); - this_cpu_inc(scp->srcu_locks.counter); /* Y */ - barrier(); /* Avoid leaking the critical section. */ - return __srcu_ptr_to_ctr(ssp, scp); -} - -/* - * Removes the count for the old reader from the appropriate - * per-CPU element of the srcu_struct. Note that this may well be a - * different CPU than that which was incremented by the corresponding - * srcu_read_lock_lite(), but it must be within the same task. - * - * Note that this_cpu_inc() is an RCU read-side critical section either - * because it disables interrupts, because it is a single instruction, - * or because it is a read-modify-write atomic operation, depending on - * the whims of the architecture. - */ -static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) -{ - barrier(); /* Avoid leaking the critical section. */ - this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); /* Z */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); -} - void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); // Record reader usage even for CONFIG_PROVE_RCU=n kernels. This is -- cgit v1.2.3 From df6f9a918ea856fc288b9001b0414c5be136d7d0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 16 Jul 2025 11:03:13 +0900 Subject: ata: libata-eh: Remove ata_do_eh() The only reason for ata_do_eh() to exist is that the two caller sites, ata_std_error_handler() and ata_sff_error_handler() may pass it a NULL hardreset operation so that the built-in (generic) hardreset operation for a driver is ignored if the adapter SCR access is not available. However, ata_std_error_handler() and ata_sff_error_handler() modifications of the hardreset port operation can easily be combined as they are mutually exclusive. That is, a driver using sata_std_hardreset() as its hardreset operation cannot use sata_sff_hardreset() and vice-versa. With this observation, ata_do_eh() can be removed and its code moved to ata_std_error_handler(). The condition used to ignore the built-in hardreset port operation is modified to be the one that was used in ata_sff_error_handler(). This requires defining a stub for the function sata_sff_hardreset() to avoid compilation errors when CONFIG_ATA_SFF is not enabled. Furthermore, instead of modifying the local hardreset operation definition, set the ATA_LFLAG_NO_HRST link flag to prevent the use of built-in hardreset methods for ports without a valid scr_read function. This flag is checked in ata_eh_reset() and if set, the hardreset method is ignored. This change simplifies ata_sff_error_handler() as this function now only needs to call ata_std_error_handler(). No functional changes. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250716020315.235457-2-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index d092747be588..cf0b3fff3198 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1412,9 +1412,6 @@ extern void ata_eh_thaw_port(struct ata_port *ap); extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); -extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, - ata_reset_fn_t softreset, ata_reset_fn_t hardreset, - ata_postreset_fn_t postreset); extern void ata_std_error_handler(struct ata_port *ap); extern void ata_std_sched_eh(struct ata_port *ap); extern void ata_std_end_eh(struct ata_port *ap); @@ -2152,6 +2149,12 @@ static inline u8 ata_wait_idle(struct ata_port *ap) return status; } +#else /* CONFIG_ATA_SFF */ +static inline int sata_sff_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_ATA_SFF */ #endif /* __LINUX_LIBATA_H__ */ -- cgit v1.2.3 From a4daf088a77323154514eb1f8626bbdf9329cfd4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 16 Jul 2025 11:03:14 +0900 Subject: ata: libata-eh: Simplify reset operation management Introduce struct ata_reset_operations to aggregate in a single structure the definitions of the 4 reset methods (prereset, softreset, hardreset and postreset) for a port. This new structure is used in struct ata_port to define the reset methods for a regular port (reset field) and for a port-multiplier port (pmp_reset field). A pointer to either of these fields replaces the 4 reset method arguments passed to ata_eh_recover() and ata_eh_reset(). The definition of the reset methods for all drivers is changed to use the reset and pmp_reset fields in struct ata_port_operations. A large number of files is modifed, but no functional changes are introduced. Suggested-by: Niklas Cassel Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250716020315.235457-3-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index cf0b3fff3198..912ace523880 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -944,6 +944,13 @@ struct ata_port { */ #define ATA_OP_NULL (void *)(unsigned long)(-ENOENT) +struct ata_reset_operations { + ata_prereset_fn_t prereset; + ata_reset_fn_t softreset; + ata_reset_fn_t hardreset; + ata_postreset_fn_t postreset; +}; + struct ata_port_operations { /* * Command execution @@ -970,14 +977,8 @@ struct ata_port_operations { void (*freeze)(struct ata_port *ap); void (*thaw)(struct ata_port *ap); - ata_prereset_fn_t prereset; - ata_reset_fn_t softreset; - ata_reset_fn_t hardreset; - ata_postreset_fn_t postreset; - ata_prereset_fn_t pmp_prereset; - ata_reset_fn_t pmp_softreset; - ata_reset_fn_t pmp_hardreset; - ata_postreset_fn_t pmp_postreset; + struct ata_reset_operations reset; + struct ata_reset_operations pmp_reset; void (*error_handler)(struct ata_port *ap); void (*lost_interrupt)(struct ata_port *ap); void (*post_internal_cmd)(struct ata_queued_cmd *qc); -- cgit v1.2.3 From 95a16160ca1d75c66bf7a1c5e0bcaffb18e7c7fc Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 16 Jul 2025 13:21:24 +0530 Subject: drm/amdgpu: Reset the clear flag in buddy during resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added a handler in DRM buddy manager to reset the cleared flag for the blocks in the freelist. - This is necessary because, upon resuming, the VRAM becomes cluttered with BIOS data, yet the VRAM backend manager believes that everything has been cleared. v2: - Add lock before accessing drm_buddy_clear_reset_blocks()(Matthew Auld) - Force merge the two dirty blocks.(Matthew Auld) - Add a new unit test case for this issue.(Matthew Auld) - Having this function being able to flip the state either way would be good. (Matthew Brost) v3(Matthew Auld): - Do merge step first to avoid the use of extra reset flag. Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Acked-by: Christian König Reviewed-by: Matthew Auld Cc: stable@vger.kernel.org Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3812 Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250716075125.240637-2-Arunpravin.PaneerSelvam@amd.com --- include/drm/drm_buddy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 9689a7c5dd36..513837632b7d 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -160,6 +160,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm, u64 new_size, struct list_head *blocks); +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear); + void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); void drm_buddy_free_list(struct drm_buddy *mm, -- cgit v1.2.3 From 35cff7af7598b9eb143cc0556e5532e2ded3b61a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 20 May 2025 13:34:37 +0300 Subject: container_of: Document container_of() is not to be used in new code There is a warning in the kerneldoc documentation of container_of() that constness of its ptr argument is lost. While this is a valid suggestion container_of_const() should be used instead, the vast majority of new code still uses container_of(): $ git diff v6.13 v6.14|grep container_of\(|wc -l 646 $ git diff v6.13 v6.14|grep container_of_const|wc -l 9 Make an explicit recommendation to use container_of_const(). Signed-off-by: Sakari Ailus Link: https://lore.kernel.org/r/20250520103437.468691-1-sakari.ailus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/container_of.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 713890c867be..1f6ebf27d962 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -14,6 +14,7 @@ * @member: the name of the member within the struct. * * WARNING: any const qualifier of @ptr is lost. + * Do not use container_of() in new code. */ #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ @@ -28,6 +29,8 @@ * @ptr: the pointer to the member * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. + * + * Always prefer container_of_const() instead of container_of() in new code. */ #define container_of_const(ptr, type, member) \ _Generic(ptr, \ -- cgit v1.2.3 From bdfa89c489296f092751fcee23b5d171c9fdc7f5 Mon Sep 17 00:00:00 2001 From: Tiffany Yang Date: Mon, 14 Jul 2025 11:53:16 -0700 Subject: kunit: test: Export kunit_attach_mm() Tests can allocate from virtual memory using kunit_vm_mmap(), which transparently creates and attaches an mm_struct to the test runner if one is not already attached. This is suitable for most cases, except for when the code under test must access a task's mm before performing an mmap. Expose kunit_attach_mm() as part of the interface for those cases. This does not change the existing behavior. Cc: David Gow Signed-off-by: Tiffany Yang Reviewed-by: Carlos Llamas Link: https://lore.kernel.org/r/20250714185321.2417234-4-ynaffit@google.com Signed-off-by: Greg Kroah-Hartman --- include/kunit/test.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 39c768f87dc9..d958ee53050e 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -531,6 +531,18 @@ static inline char *kunit_kstrdup(struct kunit *test, const char *str, gfp_t gfp */ const char *kunit_kstrdup_const(struct kunit *test, const char *str, gfp_t gfp); +/** + * kunit_attach_mm() - Create and attach a new mm if it doesn't already exist. + * + * Allocates a &struct mm_struct and attaches it to @current. In most cases, call + * kunit_vm_mmap() without calling kunit_attach_mm() directly. Only necessary when + * code under test accesses the mm before executing the mmap (e.g., to perform + * additional initialization beforehand). + * + * Return: 0 on success, -errno on failure. + */ +int kunit_attach_mm(void); + /** * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area * @test: The test context object. -- cgit v1.2.3 From 5eac636917486f3f072328d7f5bcdc22bbc9a1d1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 1 Jul 2025 14:07:01 +0200 Subject: fsi: make fsi_bus_type constant Now that the driver core can properly handle constant struct bus_type, move the fsi_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Ninad Palsule Cc: linux-fsi@lists.ozlabs.org Reviewed-by: Eddie James Link: https://lore.kernel.org/r/2025070100-overblown-busily-a04b@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index 8c5eef808788..adea1b432f2d 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -68,7 +68,7 @@ extern int fsi_slave_read(struct fsi_slave *slave, uint32_t addr, extern int fsi_slave_write(struct fsi_slave *slave, uint32_t addr, const void *val, size_t size); -extern struct bus_type fsi_bus_type; +extern const struct bus_type fsi_bus_type; extern const struct device_type fsi_cdev_type; enum fsi_dev_type { -- cgit v1.2.3 From e9d8e2bf23206825ca9b4d3caf587945ba807939 Mon Sep 17 00:00:00 2001 From: Taotao Chen Date: Wed, 16 Jul 2025 09:36:06 +0000 Subject: fs: change write_begin/write_end interface to take struct kiocb * Change the address_space_operations callbacks write_begin() and write_end() to take struct kiocb * as the first argument instead of struct file *. Update all affected function prototypes, implementations, call sites, and related documentation across VFS, filesystems, and block layer. Part of a series refactoring address_space_operations write_begin and write_end callbacks to use struct kiocb for passing write context and flags. Signed-off-by: Taotao Chen Link: https://lore.kernel.org/20250716093559.217344-4-chentaotao@didiglobal.com Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 178eb90e9cf3..b16b88bfbc3e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -263,11 +263,11 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *); -int generic_write_end(struct file *, struct address_space *, +int generic_write_end(const struct kiocb *, struct address_space *, loff_t, unsigned len, unsigned copied, struct folio *, void *); void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); -int cont_write_begin(struct file *, struct address_space *, loff_t, +int cont_write_begin(const struct kiocb *, struct address_space *, loff_t, unsigned, struct folio **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); diff --git a/include/linux/fs.h b/include/linux/fs.h index 09e3e80b0528..df8c503100c4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -444,10 +444,10 @@ struct address_space_operations { void (*readahead)(struct readahead_control *); - int (*write_begin)(struct file *, struct address_space *mapping, + int (*write_begin)(const struct kiocb *, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata); - int (*write_end)(struct file *, struct address_space *mapping, + int (*write_end)(const struct kiocb *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata); @@ -3598,9 +3598,10 @@ extern void simple_recursive_removal(struct dentry *, extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); -extern int simple_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, - struct folio **foliop, void **fsdata); +extern int simple_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, + struct folio **foliop, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -- cgit v1.2.3 From b799474b9aeb46ec698874d4de1a799de8b5f64f Mon Sep 17 00:00:00 2001 From: Taotao Chen Date: Wed, 16 Jul 2025 09:36:08 +0000 Subject: mm/pagemap: add write_begin_get_folio() helper function Add write_begin_get_folio() to simplify the common folio lookup logic used by filesystem ->write_begin() implementations. This helper wraps __filemap_get_folio() with common flags such as FGP_WRITEBEGIN, conditional FGP_DONTCACHE, and set folio order based on the write length. Part of a series refactoring address_space_operations write_begin and write_end callbacks to use struct kiocb for passing write context and flags. Signed-off-by: Taotao Chen Link: https://lore.kernel.org/20250716093559.217344-5-chentaotao@didiglobal.com Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e63fbfbd5b0f..ce2bcdcadb73 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -750,6 +750,33 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); +/** + * write_begin_get_folio - Get folio for write_begin with flags. + * @iocb: The kiocb passed from write_begin (may be NULL). + * @mapping: The address space to search. + * @index: The page cache index. + * @len: Length of data being written. + * + * This is a helper for filesystem write_begin() implementations. + * It wraps __filemap_get_folio(), setting appropriate flags in + * the write begin context. + * + * Return: A folio or an ERR_PTR. + */ +static inline struct folio *write_begin_get_folio(const struct kiocb *iocb, + struct address_space *mapping, pgoff_t index, size_t len) +{ + fgf_t fgp_flags = FGP_WRITEBEGIN; + + fgp_flags |= fgf_set_order(len); + + if (iocb && iocb->ki_flags & IOCB_DONTCACHE) + fgp_flags |= FGP_DONTCACHE; + + return __filemap_get_folio(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); +} + /** * filemap_get_folio - Find and get a folio. * @mapping: The address_space to search. -- cgit v1.2.3 From ab17ead0e0ee8650cd1cf4e481b1ed0ee9731956 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 16 Jul 2025 15:36:31 +0200 Subject: block: fix blk_zone_append_update_request_bio() kernel-doc Stephen reported new 'make htmldocs' warnings introduced by 4cc21a00762b ("block: add tracepoint for blk_zone_update_request_bio"). One is a wrong function name in the tracepoint's kernel-doc and one is a wrong function parameter. Fix these so 'make htmldocs' is warning free again for the block layer tracepoints. Fixes: 4cc21a00762b ("block: add tracepoint for blk_zone_update_request_bio") Reported-by: Stephen Rothwell Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250716133631.94898-1-johannes.thumshirn@wdc.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 3e582d5e3a57..6aa79e2d799c 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -405,8 +405,8 @@ DEFINE_EVENT(block_bio, block_getrq, ); /** - * block_zone_update_request_bio - update the bio sector after a zone append - * @bio: the completed block IO operation + * blk_zone_append_update_request_bio - update bio sector after zone append + * @rq: the completed request that sets the bio sector * * Update the bio's bi_sector after a zone append command has been completed. */ -- cgit v1.2.3 From 0389e4256eb29ee80598129b8004db5bbbd6fbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:04 +0300 Subject: drm: Pass pixel_format+modifier to .get_format_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decouple .get_format_info() from struct drm_mode_fb_cmd2 and just pass the pixel format+modifier combo in by hand. We may want to use .get_format_info() outside of the normal addfb paths where we won't have a struct drm_mode_fb_cmd2, and creating a temporary one just for this seems silly. v2: Fix intel_fb_get_format_info() docs (Laurent) Cc: Harry Wentland Cc: Leo Li Cc: Rodrigo Siqueira Cc: Alex Deucher Cc: amd-gfx@lists.freedesktop.org Cc: Laurent Pinchart Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-2-ville.syrjala@linux.intel.com --- include/drm/drm_mode_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 9e524b51a001..e971e1b8a850 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -95,7 +95,7 @@ struct drm_mode_config_funcs { * The format information specific to the given fb metadata, or * NULL if none is found. */ - const struct drm_format_info *(*get_format_info)(const struct drm_mode_fb_cmd2 *mode_cmd); + const struct drm_format_info *(*get_format_info)(u32 pixel_format, u64 modifier); /** * @mode_valid: -- cgit v1.2.3 From 0e7d5874fb6b80c44be3cfbcf1cf356e81d91232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:05 +0300 Subject: drm: Pass pixel_format+modifier directly to drm_get_format_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decouple drm_get_format_info() from struct drm_mode_fb_cmd2 and just pass the pixel format+modifier combo in by hand. We may want to use drm_get_format_info() outside of the normal addfb paths where we won't have a struct drm_mode_fb_cmd2, and creating a temporary one just for this seems silly. Done with cocci: @@ identifier dev, mode_cmd; @@ struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd + u32 pixel_format, u64 modifier ) { <... ( - mode_cmd->pixel_format + pixel_format | - mode_cmd->modifier[0] + modifier ) ...> } @@ identifier dev, mode_cmd; @@ struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd + u32 pixel_format, u64 modifier ); @@ expression dev, mode_cmd; @@ - drm_get_format_info(dev, mode_cmd) + drm_get_format_info(dev, mode_cmd->pixel_format, mode_cmd->modifier[0]) v2: Fix kernel docs (Laurent) Drop drm_mode_fb_cmd2 forward declaration (Thomas) Cc: Liviu Dudau Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Patrik Jakobsson Cc: Chun-Kuang Hu Cc: Philipp Zabel Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: Marek Vasut Cc: Stefan Agner Cc: Lyude Paul Cc: Danilo Krummrich Cc: Tomi Valkeinen Cc: Alex Deucher Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: Thierry Reding Cc: Mikko Perttunen Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Reviewed-by: Liviu Dudau Acked-by: Alex Deucher Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-3-ville.syrjala@linux.intel.com --- include/drm/drm_fourcc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index c3f4405d6662..471784426857 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -54,7 +54,6 @@ #endif struct drm_device; -struct drm_mode_fb_cmd2; /** * struct drm_format_info - information about a DRM format @@ -309,7 +308,7 @@ const struct drm_format_info *__drm_format_info(u32 format); const struct drm_format_info *drm_format_info(u32 format); const struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd); + u32 pixel_format, u64 modifier); uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth); uint32_t drm_driver_legacy_fb_format(struct drm_device *dev, uint32_t bpp, uint32_t depth); -- cgit v1.2.3 From 81112eaac559ccd451b3dce3bbb64d6b69083961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:07 +0300 Subject: drm: Pass the format info to .fb_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass along the format information from the top to .fb_create() so that we can avoid redundant (and somewhat expensive) lookups in the drivers. Done with cocci (with some manual fixups): @@ identifier func =~ ".*create.*"; identifier dev, file, mode_cmd; @@ struct drm_framebuffer *func( struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { ... ( - const struct drm_format_info *info = drm_get_format_info(...); | - const struct drm_format_info *info; ... - info = drm_get_format_info(...); ) <... - if (!info) - return ...; ...> } @@ identifier func =~ ".*create.*"; identifier dev, file, mode_cmd; @@ struct drm_framebuffer *func( struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { ... } @find@ identifier fb_create_func =~ ".*create.*"; identifier dev, file, mode_cmd; @@ struct drm_framebuffer *fb_create_func( struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); @@ identifier find.fb_create_func; expression dev, file, mode_cmd; @@ fb_create_func(dev, file + ,info ,mode_cmd) @@ expression dev, file, mode_cmd; @@ drm_gem_fb_create(dev, file + ,info ,mode_cmd) @@ expression dev, file, mode_cmd; @@ drm_gem_fb_create_with_dirty(dev, file + ,info ,mode_cmd) @@ expression dev, file_priv, mode_cmd; identifier info, fb; @@ info = drm_get_format_info(...); ... fb = dev->mode_config.funcs->fb_create(dev, file_priv + ,info ,mode_cmd); @@ identifier dev, file_priv, mode_cmd; @@ struct drm_mode_config_funcs { ... struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); ... }; v2: Fix kernel docs (Laurent) Fix commit msg (Geert) Cc: Alex Deucher Cc: Liviu Dudau Cc: Maxime Ripard Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Patrik Jakobsson Cc: Chun-Kuang Hu Cc: Philipp Zabel Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: Marek Vasut Cc: Stefan Agner Cc: Lyude Paul Cc: Danilo Krummrich Cc: Tomi Valkeinen Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Kieran Bingham Cc: Biju Das Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: Thierry Reding Cc: Mikko Perttunen Cc: Dave Stevenson Cc: "Maíra Canal" Cc: Raspberry Pi Kernel Maintenance Cc: Dmitry Osipenko Cc: Gurchetan Singh Cc: Chia-I Wu Cc: Zack Rusin Cc: Broadcom internal kernel review list Cc: Oleksandr Andrushchenko Cc: amd-gfx@lists.freedesktop.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: virtualization@lists.linux.dev Cc: spice-devel@lists.freedesktop.org Cc: linux-renesas-soc@vger.kernel.org Cc: linux-tegra@vger.kernel.org Cc: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Reviewed-by: Thomas Zimmermann Reviewed-by: Dmitry Baryshkov Acked-by: Liviu Dudau Reviewed-by: Laurent Pinchart Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-5-ville.syrjala@linux.intel.com --- include/drm/drm_gem_framebuffer_helper.h | 3 +++ include/drm/drm_mode_config.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index d302521f3dd4..4fdf9d3d1863 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -8,6 +8,7 @@ struct drm_afbc_framebuffer; struct drm_device; struct drm_fb_helper_surface_size; struct drm_file; +struct drm_format_info; struct drm_framebuffer; struct drm_framebuffer_funcs; struct drm_gem_object; @@ -32,9 +33,11 @@ drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer * drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct iosys_map *map, diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index e971e1b8a850..2e848b816218 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -82,6 +82,7 @@ struct drm_mode_config_funcs { */ struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); /** -- cgit v1.2.3 From a34cc7bf1034280904f9683e260f9d9e9fd4b84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:08 +0300 Subject: drm: Allow the caller to pass in the format info to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Soon all drivers should have the format info already available in the places where they call drm_helper_mode_fill_fb_struct(). Allow it to be passed along into drm_helper_mode_fill_fb_struct() instead of doing yet another redundant lookup. Start by always passing in NULL and still doing the extra lookup. The actual changes to avoid the lookup will follow. Done with cocci (with some manual fixups): @@ identifier dev, fb, mode_cmd; expression get_format_info; @@ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { ... - fb->format = get_format_info; + fb->format = info ?: get_format_info; ... } @@ identifier dev, fb, mode_cmd; @@ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); @@ expression dev, fb, mode_cmd; @@ drm_helper_mode_fill_fb_struct(dev, fb + ,NULL ,mode_cmd); Cc: Alex Deucher Cc: Liviu Dudau Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Patrik Jakobsson Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: Lyude Paul Cc: Danilo Krummrich Cc: Tomi Valkeinen Cc: Thierry Reding Cc: Mikko Perttunen Cc: Gerd Hoffmann Cc: Dmitry Osipenko Cc: Gurchetan Singh Cc: Chia-I Wu Cc: Zack Rusin Cc: Broadcom internal kernel review list Cc: amd-gfx@lists.freedesktop.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Cc: virtualization@lists.linux.dev Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Reviewed-by: Dmitry Baryshkov Reviewed-by: Liviu Dudau Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-6-ville.syrjala@linux.intel.com --- include/drm/drm_modeset_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_modeset_helper.h b/include/drm/drm_modeset_helper.h index 995fd981cab0..7e3d4c5a7f66 100644 --- a/include/drm/drm_modeset_helper.h +++ b/include/drm/drm_modeset_helper.h @@ -26,6 +26,7 @@ struct drm_crtc; struct drm_crtc_funcs; struct drm_device; +struct drm_format_info; struct drm_framebuffer; struct drm_mode_fb_cmd2; @@ -33,6 +34,7 @@ void drm_helper_move_panel_connectors_to_head(struct drm_device *); void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, -- cgit v1.2.3 From 04a5889cf75aa5b59bd1e13c33eccaf49f3f9d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:10 +0300 Subject: drm/gem: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass along the format info from .fb_create() to eliminate the redundant drm_get_format_info() calls from the gem fb code. v2: Fix kernel docs (Laurent) Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: Oleksandr Andrushchenko Cc: virtualization@lists.linux.dev Cc: spice-devel@lists.freedesktop.org Cc: Laurent Pinchart Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-8-ville.syrjala@linux.intel.com --- include/drm/drm_gem_framebuffer_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 4fdf9d3d1863..971d266ab1ba 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -25,10 +25,12 @@ int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file, int drm_gem_fb_init_with_funcs(struct drm_device *dev, struct drm_framebuffer *fb, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer * drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer * -- cgit v1.2.3 From 283da9e3a9a43e07188f038fc278140a73e781cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:11 +0300 Subject: drm/gem/afbc: Eliminate redundant drm_get_format_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass along the format info from .fb_create() to aliminate the redundant drm_get_format_info() calls from the afbc code. Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Reviewed-by: Thomas Zimmermann Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-9-ville.syrjala@linux.intel.com --- include/drm/drm_gem_framebuffer_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 971d266ab1ba..24f1fd40d553 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -52,6 +52,7 @@ void drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_directi (((modifier) & AFBC_VENDOR_AND_TYPE_MASK) == DRM_FORMAT_MOD_ARM_AFBC(0)) int drm_gem_fb_afbc_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_afbc_framebuffer *afbc_fb); -- cgit v1.2.3 From 857d18f23ab17284d1b6de6f61f4e74958596376 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jul 2025 16:49:25 -0700 Subject: cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks scoped_cond_guard(), automatic cleanup for conditional locks, has a couple pain points: * It causes existing straight-line code to be re-indented into a new bracketed scope. While this can be mitigated by a new helper function to contain the scope, that is not always a comfortable conversion. * The return code from the conditional lock is tossed in favor of a scheme to pass a 'return err;' statement to the macro. Other attempts to clean this up, to behave more like guard() [1], got hung up trying to both establish and evaluate the conditional lock in one statement. ACQUIRE() solves this by reflecting the result of the condition in the automatic variable established by the lock CLASS(). The result is separately retrieved with the ACQUIRE_ERR() helper, effectively a PTR_ERR() operation. Link: http://lore.kernel.org/all/Z1LBnX9TpZLR5Dkf@gmail.com [1] Link: http://patch.msgid.link/20250512105026.GP4439@noisy.programming.kicks-ass.net Link: http://patch.msgid.link/20250512185817.GA1808@noisy.programming.kicks-ass.net Cc: Ingo Molnar Cc: Linus Torvalds Cc: David Lechner Cc: Fabio M. De Francesco Signed-off-by: Peter Zijlstra (Intel) [djbw: wrap Peter's proposal with changelog and comments] Co-developed-by: Dan Williams Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250711234932.671292-2-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- include/linux/cleanup.h | 95 +++++++++++++++++++++++++++++++++++++++++-------- include/linux/mutex.h | 2 +- include/linux/rwsem.h | 2 +- 3 files changed, 83 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 7093e1d08af0..4eb83dd71cfe 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -3,6 +3,8 @@ #define _LINUX_CLEANUP_H #include +#include +#include /** * DOC: scope-based cleanup helpers @@ -61,9 +63,21 @@ * Observe the lock is held for the remainder of the "if ()" block not * the remainder of "func()". * - * Now, when a function uses both __free() and guard(), or multiple - * instances of __free(), the LIFO order of variable definition order - * matters. GCC documentation says: + * The ACQUIRE() macro can be used in all places that guard() can be + * used and additionally support conditional locks + * + * + * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) + * ... + * ACQUIRE(pci_dev_try, lock)(dev); + * rc = ACQUIRE_ERR(pci_dev_try, &lock); + * if (rc) + * return rc; + * // @lock is held + * + * Now, when a function uses both __free() and guard()/ACQUIRE(), or + * multiple instances of __free(), the LIFO order of variable definition + * order matters. GCC documentation says: * * "When multiple variables in the same scope have cleanup attributes, * at exit from the scope their associated cleanup functions are run in @@ -305,14 +319,46 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * acquire fails. * * Only for conditional locks. + * + * ACQUIRE(name, var): + * a named instance of the (guard) class, suitable for conditional + * locks when paired with ACQUIRE_ERR(). + * + * ACQUIRE_ERR(name, &var): + * a helper that is effectively a PTR_ERR() conversion of the guard + * pointer. Returns 0 when the lock was acquired and a negative + * error code otherwise. */ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*(_exp); } +#define __GUARD_IS_ERR(_ptr) \ + ({ \ + unsigned long _rc = (__force unsigned long)(_ptr); \ + unlikely((_rc - 1) >= -MAX_ERRNO - 1); \ + }) + +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { \ + void *_ptr = (void *)(__force unsigned long)*(_exp); \ + if (IS_ERR(_ptr)) { \ + _ptr = NULL; \ + } \ + return _ptr; \ + } \ + static inline int class_##_name##_lock_err(class_##_name##_t *_T) \ + { \ + long _rc = (__force unsigned long)*(_exp); \ + if (!_rc) { \ + _rc = -EBUSY; \ + } \ + if (!IS_ERR_VALUE(_rc)) { \ + _rc = 0; \ + } \ + return _rc; \ + } #define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ @@ -323,23 +369,37 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) -#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ +#define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ - ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +/* + * Default binary condition; success on 'true'. + */ +#define DEFINE_GUARD_COND_3(_name, _ext, _lock) \ + DEFINE_GUARD_COND_4(_name, _ext, _lock, _RET) + +#define DEFINE_GUARD_COND(X...) CONCATENATE(DEFINE_GUARD_COND_, COUNT_ARGS(X))(X) #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __guard_err(_name) class_##_name##_lock_err #define __is_cond_ptr(_name) class_##_name##_is_conditional +#define ACQUIRE(_name, _var) CLASS(_name, _var) +#define ACQUIRE_ERR(_name, _var) __guard_err(_name)(_var) + /* * Helper macro for scoped_guard(). * @@ -401,7 +461,7 @@ typedef struct { \ \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ - if (_T->lock) { _unlock; } \ + if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -433,15 +493,22 @@ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) -#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ +#define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ - if (_T->lock && !(_condlock)) _T->lock = NULL; \ + int _RET = (_lock); \ + if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ _t; }), \ typeof_member(class_##_name##_t, lock) l) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +#define DEFINE_LOCK_GUARD_1_COND_3(_name, _ext, _lock) \ + DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _RET) +#define DEFINE_LOCK_GUARD_1_COND(X...) CONCATENATE(DEFINE_LOCK_GUARD_1_COND_, COUNT_ARGS(X))(X) #endif /* _LINUX_CLEANUP_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a039fa8c1780..9d5d7ed5c101 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -224,7 +224,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) -DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T), _RET == 0) extern unsigned long mutex_get_owner(struct mutex *lock); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..c810deb88d13 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -240,7 +240,7 @@ extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) -DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) -- cgit v1.2.3 From d03fcf50ba56f4479685b951506422eeca230853 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Jul 2025 16:49:32 -0700 Subject: cxl: Convert to ACQUIRE() for conditional rwsem locking Use ACQUIRE() to cleanup conditional locking paths in the CXL driver The ACQUIRE() macro and its associated ACQUIRE_ERR() helpers, like scoped_cond_guard(), arrange for scoped-based conditional locking. Unlike scoped_cond_guard(), these macros arrange for an ERR_PTR() to be retrieved representing the state of the conditional lock. The goal of this conversion is to complete the removal of all explicit unlock calls in the subsystem. I.e. the methods to acquire a lock are solely via guard(), scoped_guard() (for limited cases), or ACQUIRE(). All unlock is implicit / scope-based. In order to make sure all lock sites are converted, the existing rwsem's are consolidated and renamed in 'struct cxl_rwsem'. While that makes the patch noisier it gives a clean cut-off between old-world (explicit unlock allowed), and new world (explicit unlock deleted). Cc: David Lechner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Ingo Molnar Cc: Fabio M. De Francesco Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Shiju Jose Acked-by: Peter Zijlstra (Intel) Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Fabio M. De Francesco Reviewed-by: Dave Jiang Tested-by: Shiju Jose Link: https://patch.msgid.link/20250711234932.671292-9-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- include/linux/rwsem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c810deb88d13..cbafdc12e743 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -244,6 +244,7 @@ DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) +DEFINE_GUARD_COND(rwsem_write, _kill, down_write_killable(_T), _RET == 0) /* * downgrade write lock to read lock -- cgit v1.2.3 From 09813cde376d9d8f30eaf761534532101a0a7755 Mon Sep 17 00:00:00 2001 From: Hiago De Franco Date: Sun, 29 Jun 2025 14:25:10 -0300 Subject: pmdomain: core: introduce dev_pm_genpd_is_on() This helper function returns the current power status of a given generic power domain. As example, remoteproc/imx_rproc.c can now use this function to check the power status of the remote core to properly set "attached" or "offline" modes. Suggested-by: Ulf Hansson Reviewed-by: Bjorn Andersson Reviewed-by: Peng Fan Signed-off-by: Hiago De Franco Link: https://lore.kernel.org/r/20250629172512.14857-2-hiagofranco@gmail.com Signed-off-by: Ulf Hansson --- include/linux/pm_domain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 99556589f45e..b9d3c7d5c4f8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -315,6 +315,7 @@ void dev_pm_genpd_synced_poweroff(struct device *dev); int dev_pm_genpd_set_hwmode(struct device *dev, bool enable); bool dev_pm_genpd_get_hwmode(struct device *dev); int dev_pm_genpd_rpm_always_on(struct device *dev, bool on); +bool dev_pm_genpd_is_on(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -407,6 +408,11 @@ static inline int dev_pm_genpd_rpm_always_on(struct device *dev, bool on) return -EOPNOTSUPP; } +static inline bool dev_pm_genpd_is_on(struct device *dev) +{ + return false; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -- cgit v1.2.3 From c1f1fda141373d7253b4c1497043b0ef85f534ce Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Mon, 14 Jul 2025 19:42:12 +0800 Subject: ACPI: APEI: handle synchronous exceptions in task work The memory uncorrected error could be signaled by asynchronous interrupt (specifically, SPI in arm64 platform), e.g. when an error is detected by a background scrubber, or signaled by synchronous exception (specifically, data abort exception in arm64 platform), e.g. when a CPU tries to access a poisoned cache line. Currently, both synchronous and asynchronous errors use memory_failure_queue() to schedule memory_failure() to exectute in a kworker context. As a result, when a user-space process is accessing a poisoned data, a data abort is taken and the memory_failure() is executed in the kworker context, which: - will send wrong si_code by SIGBUS signal in early_kill mode, and - can not kill the user-space in some cases resulting a synchronous error infinite loop Issue 1: send wrong si_code in early_kill mode Since commit a70297d22132 ("ACPI: APEI: set memory failure flags as MF_ACTION_REQUIRED on synchronous events")', the flag MF_ACTION_REQUIRED could be used to determine whether a synchronous exception occurs on ARM64 platform. When a synchronous exception is detected, the kernel is expected to terminate the current process which has accessed a poisoned page. This is done by sending a SIGBUS signal with error code BUS_MCEERR_AR, indicating an action-required machine check error on read. However, when kill_proc() is called to terminate the processes who has the poisoned page mapped, it sends the incorrect SIGBUS error code BUS_MCEERR_AO because the context in which it operates is not the one where the error was triggered. To reproduce this problem: #sysctl -w vm.memory_failure_early_kill=1 vm.memory_failure_early_kill = 1 # STEP2: inject an UCE error and consume it to trigger a synchronous error #einj_mem_uc single 0: single vaddr = 0xffffb0d75400 paddr = 4092d55b400 injecting ... triggering ... signal 7 code 5 addr 0xffffb0d75000 page not present Test passed The si_code (code 5) from einj_mem_uc indicates that it is BUS_MCEERR_AO error and it is not factually correct. After this change: # STEP1: enable early kill mode #sysctl -w vm.memory_failure_early_kill=1 vm.memory_failure_early_kill = 1 # STEP2: inject an UCE error and consume it to trigger a synchronous error #einj_mem_uc single 0: single vaddr = 0xffffb0d75400 paddr = 4092d55b400 injecting ... triggering ... signal 7 code 4 addr 0xffffb0d75000 page not present Test passed The si_code (code 4) from einj_mem_uc indicates that it is a BUS_MCEERR_AR error as expected. Issue 2: a synchronous error infinite loop If a user-space process, e.g. devmem, accesses a poisoned page for which the HWPoison flag is set, kill_accessing_process() is called to send SIGBUS to current processs with error info. Since the memory_failure() is executed in the kworker context, it will just do nothing but return EFAULT. So, devmem will access the posioned page and trigger an exception again, resulting in a synchronous error infinite loop. Such exception loop may cause platform firmware to exceed some threshold and reboot when Linux could have recovered from this error. To reproduce this problem: # STEP 1: inject an UCE error, and kernel will set HWPosion flag for related page #einj_mem_uc single 0: single vaddr = 0xffffb0d75400 paddr = 4092d55b400 injecting ... triggering ... signal 7 code 4 addr 0xffffb0d75000 page not present Test passed # STEP 2: access the same page and it will trigger a synchronous error infinite loop devmem 0x4092d55b400 To fix above two issues, queue memory_failure() as a task_work so that it runs in the context of the process that is actually consuming the poisoned data. Signed-off-by: Shuai Xue Tested-by: Ma Wupeng Reviewed-by: Kefeng Wang Reviewed-by: Xiaofei Tan Reviewed-by: Baolin Wang Reviewed-by: Jarkko Sakkinen Reviewed-by: Jonathan Cameron Reviewed-by: Jane Chu Reviewed-by: Yazen Ghannam Reviewed-by: Hanjun Guo Link: https://patch.msgid.link/20250714114212.31660-3-xueshuai@linux.alibaba.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/acpi/ghes.h | 3 --- include/linux/mm.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index be1dd4c1a917..ebd21b05fe6e 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -35,9 +35,6 @@ struct ghes_estatus_node { struct llist_node llnode; struct acpi_hest_generic *generic; struct ghes *ghes; - - int task_work_cpu; - struct callback_head task_work; }; struct ghes_estatus_cache { diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..02dffff2508b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3896,7 +3896,6 @@ enum mf_flags { int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); extern int memory_failure(unsigned long pfn, int flags); -extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); -- cgit v1.2.3 From dfef8d87a031ac1a46dde3de804e0fcf3c3a6afd Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:43 +0200 Subject: Bluetooth: hci_core: fix typos in macros The provided macro parameter is named 'dev' (rather than 'hdev', which may be a variable on the stack where the macro is used). Fixes: a9a830a676a9 ("Bluetooth: hci_event: Fix sending HCI_OP_READ_ENC_KEY_SIZE") Fixes: 6126ffabba6b ("Bluetooth: Introduce HCI_CONN_FLAG_DEVICE_PRIVACY device flag") Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0da011fc8146..052c91613bb9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1940,11 +1940,11 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ - (hdev->commands[39] & 0x04)) + ((dev)->commands[39] & 0x04)) #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &(dev)->quirks)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ -- cgit v1.2.3 From cdee6a4416b2a57c89082929cc60e2275bb32a3a Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:44 +0200 Subject: Bluetooth: hci_core: add missing braces when using macro parameters Macro parameters should always be put into braces when accessing it. Fixes: 4fc9857ab8c6 ("Bluetooth: hci_sync: Add check simultaneous roles support") Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 052c91613bb9..367ca43f45d1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -829,20 +829,20 @@ extern struct mutex hci_cb_list_lock; #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) -#define hci_dev_clear_volatile_flags(hdev) \ - do { \ - hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ - hci_dev_clear_flag(hdev, HCI_LE_ADV); \ - hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ - hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ - hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ +#define hci_dev_clear_volatile_flags(hdev) \ + do { \ + hci_dev_clear_flag((hdev), HCI_LE_SCAN); \ + hci_dev_clear_flag((hdev), HCI_LE_ADV); \ + hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \ + hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \ + hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \ } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ - (hdev->le_states[4] & 0x08) && /* Central */ \ - (hdev->le_states[4] & 0x40) && /* Peripheral */ \ - (hdev->le_states[3] & 0x10)) /* Simultaneous */ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + ((hdev)->le_states[4] & 0x08) && /* Central */ \ + ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ + ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); -- cgit v1.2.3 From 6851a0c228fc040dce8e4c393004209e7372e0a3 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:45 +0200 Subject: Bluetooth: hci_dev: replace 'quirks' integer by 'quirk_flags' bitmap The 'quirks' member already ran out of bits on some platforms some time ago. Replace the integer member by a bitmap in order to have enough bits in future. Replace raw bit operations by accessor macros. Fixes: ff26b2dd6568 ("Bluetooth: Add quirk for broken READ_VOICE_SETTING") Fixes: 127881334eaa ("Bluetooth: Add quirk for broken READ_PAGE_SCAN_TYPE") Suggested-by: Pauli Virtanen Tested-by: Ivan Pravdin Signed-off-by: Kiran K Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 ++ include/net/bluetooth/hci_core.h | 28 ++++++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 82cbd54443ac..c79901f2dc2a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -377,6 +377,8 @@ enum { * This quirk must be set before hci_register_dev is called. */ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, + + __HCI_NUM_QUIRKS, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 367ca43f45d1..f79f59e67114 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -464,7 +464,7 @@ struct hci_dev { unsigned int auto_accept_delay; - unsigned long quirks; + DECLARE_BITMAP(quirk_flags, __HCI_NUM_QUIRKS); atomic_t cmd_cnt; unsigned int acl_cnt; @@ -656,6 +656,10 @@ struct hci_dev { u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; +#define hci_set_quirk(hdev, nr) set_bit((nr), (hdev)->quirk_flags) +#define hci_clear_quirk(hdev, nr) clear_bit((nr), (hdev)->quirk_flags) +#define hci_test_quirk(hdev, nr) test_bit((nr), (hdev)->quirk_flags) + #define HCI_PHY_HANDLE(handle) (handle & 0xff) enum conn_reasons { @@ -839,7 +843,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + (!hci_test_quirk((hdev), HCI_QUIRK_BROKEN_LE_STATES) && \ ((hdev)->le_states[4] & 0x08) && /* Central */ \ ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ @@ -1931,8 +1935,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M)) #define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \ - !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_LE_CODED)) #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) @@ -1944,27 +1948,27 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_VOICE_SETTING)) /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ #define enhanced_sync_conn_capable(dev) \ (((dev)->commands[29] & 0x08) && \ - !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_SCAN)) /* Use ext create connection if command is supported */ #define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) @@ -1979,8 +1983,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); */ #define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \ ext_adv_capable(dev)) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Periodic advertising support */ #define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) @@ -1997,7 +2001,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ - (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks))) + (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 -- cgit v1.2.3 From 55d42f6169760d052330f3c949c02e37867b87d8 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Mon, 23 Jun 2025 18:44:50 -0400 Subject: drm/amd/amdgpu: Add helper functions for isp buffers Accessing amdgpu internal data structures "struct amdgpu_device" and "struct amdgpu_bo" in ISP V4L2 driver to alloc/free GART buffers is not recommended. Add new amdgpu_isp helper functions that takes opaque params from ISP V4L2 driver and calls the amdgpu internal functions amdgpu_bo_create_isp_user() and amdgpu_bo_create_kernel() to alloc/free GART buffers. Reviewed-by: Mario Limonciello Signed-off-by: Pratap Nirujogi Signed-off-by: Alex Deucher --- include/drm/amd/isp.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/drm/amd/isp.h (limited to 'include') diff --git a/include/drm/amd/isp.h b/include/drm/amd/isp.h new file mode 100644 index 000000000000..ec868288abf2 --- /dev/null +++ b/include/drm/amd/isp.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#ifndef __ISP_H__ +#define __ISP_H__ + +#include + +struct device; + +struct isp_platform_data { + void *adev; + u32 asic_type; + resource_size_t base_rmmio_size; +}; + +int isp_user_buffer_alloc(struct device *dev, void *dmabuf, + void **buf_obj, u64 *buf_addr); + +void isp_user_buffer_free(void *buf_obj); + +int isp_kernel_buffer_alloc(struct device *dev, u64 size, + void **buf_obj, u64 *gpu_addr, void **cpu_addr); + +void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr); + +#endif -- cgit v1.2.3 From 78d0a27ae0e2e70b22895f4b388cc0ab88e3c6ca Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 25 Jun 2025 15:29:45 +0800 Subject: drm/amdgpu: Add user queue instance count in HW IP info This change exposes the number of available user queue instances for each hardware IP type (GFX, COMPUTE, SDMA) through the drm_amdgpu_info_hw_ip interface. Key changes: 1. Added userq_num_instance field to drm_amdgpu_info_hw_ip structure 2. Implemented counting of available HQD slots using: - mes.gfx_hqd_mask for GFX queues - mes.compute_hqd_mask for COMPUTE queues - mes.sdma_hqd_mask for SDMA queues 3. Only counts available instances when user queues are enabled (!disable_uq) v2: using the adev->mes.gfx_hqd_mask[]/compute_hqd_mask[]/sdma_hqd_mask[] masks to determine the number of queue slots available for each engine type (Alex) v3: rename userq_num_instance to userq_num_hqds (Alex) Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 45c4fa13499c..66c4a03ac9f9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1493,6 +1493,8 @@ struct drm_amdgpu_info_hw_ip { __u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ __u32 ip_discovery_version; + /* Userq available hqds */ + __u32 userq_num_hqds; }; /* GFX metadata BO sizes and alignment info (in bytes) */ -- cgit v1.2.3 From 9ffab039bcb0bbfade0e659552d2fb912347a871 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 4 Jul 2025 15:17:43 +0800 Subject: drm/amdgpu: Replace HQD terminology with slots naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The term "HQD" is CP-specific and doesn't accurately describe the queue resources for other IP blocks like SDMA, VCN, or VPE. This change: 1. Renames `num_hqds` to `num_slots` in amdgpu_kms.c to better reflect the generic nature of the resource counting 2. Updates the UAPI struct member from `userq_num_hqds` to `userq_num_slots` 3. Maintains the same functionality while using more appropriate terminology Signed-off-by: Jesse Zhang Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 66c4a03ac9f9..bdedbaccf776 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1493,8 +1493,8 @@ struct drm_amdgpu_info_hw_ip { __u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ __u32 ip_discovery_version; - /* Userq available hqds */ - __u32 userq_num_hqds; + /* Userq available slots */ + __u32 userq_num_slots; }; /* GFX metadata BO sizes and alignment info (in bytes) */ -- cgit v1.2.3 From 19d18fdfc79217c86802271c9ce5b4ed174628cc Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Wed, 16 Jul 2025 21:46:53 +0800 Subject: bpf: Add struct bpf_token_info The 'commit 35f96de04127 ("bpf: Introduce BPF token object")' added BPF token as a new kind of BPF kernel object. And BPF_OBJ_GET_INFO_BY_FD already used to get BPF object info, so we can also get token info with this cmd. One usage scenario, when program runs failed with token, because of the permission failure, we can report what BPF token is allowing with this API for debugging. Acked-by: Andrii Nakryiko Signed-off-by: Tao Chen Link: https://lore.kernel.org/r/20250716134654.1162635-1-chen.dylane@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 +++++++++++ include/uapi/linux/bpf.h | 8 ++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bc887831eaa5..f9cd2164ed23 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2354,6 +2354,7 @@ extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; +extern const struct file_operations bpf_token_fops; #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ @@ -2551,6 +2552,9 @@ void bpf_token_inc(struct bpf_token *token); void bpf_token_put(struct bpf_token *token); int bpf_token_create(union bpf_attr *attr); struct bpf_token *bpf_token_get_from_fd(u32 ufd); +int bpf_token_get_info_by_fd(struct bpf_token *token, + const union bpf_attr *attr, + union bpf_attr __user *uattr); bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); @@ -2949,6 +2953,13 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } +static inline int bpf_token_get_info_by_fd(struct bpf_token *token, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + static inline void __dev_flush(struct list_head *flush_list) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0670e15a6100..233de8677382 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -450,6 +450,7 @@ union bpf_iter_link_info { * * **struct bpf_map_info** * * **struct bpf_btf_info** * * **struct bpf_link_info** + * * **struct bpf_token_info** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -6803,6 +6804,13 @@ struct bpf_link_info { }; } __attribute__((aligned(8))); +struct bpf_token_info { + __u64 allowed_cmds; + __u64 allowed_maps; + __u64 allowed_progs; + __u64 allowed_attachs; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach type). -- cgit v1.2.3 From 23972da96e1eee7f10c8ef641d56202ab9af8ba7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 30 Jun 2025 14:12:02 +0200 Subject: firmware: qcom: scm: remove unused arguments from SHM bridge routines qcom_scm_shm_bridge_create() and qcom_scm_shm_bridge_delete() take struct device as argument but don't use it. Remove it from these functions' prototypes. Reviewed-by: Konrad Dybcio Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20250630-qcom-scm-race-v2-1-fa3851c98611@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 983e1591bbba..82b1b8c50ca3 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -149,10 +149,10 @@ bool qcom_scm_lmh_dcvsh_available(void); int qcom_scm_gpu_init_regs(u32 gpu_req); int qcom_scm_shm_bridge_enable(void); -int qcom_scm_shm_bridge_create(struct device *dev, u64 pfn_and_ns_perm_flags, +int qcom_scm_shm_bridge_create(u64 pfn_and_ns_perm_flags, u64 ipfn_and_s_perm_flags, u64 size_and_flags, u64 ns_vmids, u64 *handle); -int qcom_scm_shm_bridge_delete(struct device *dev, u64 handle); +int qcom_scm_shm_bridge_delete(u64 handle); #ifdef CONFIG_QCOM_QSEECOM -- cgit v1.2.3 From dc3f4e75c54c19bad9a70419afae00ce6baf3ebf Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 30 Jun 2025 14:12:03 +0200 Subject: firmware: qcom: scm: take struct device as argument in SHM bridge enable qcom_scm_shm_bridge_enable() is used early in the SCM initialization routine. It makes an SCM call and so expects the internal __scm pointer in the SCM driver to be assigned. For this reason the tzmem memory pool is allocated *after* this pointer is assigned. However, this can lead to a crash if another consumer of the SCM API makes a call using the memory pool between the assignment of the __scm pointer and the initialization of the tzmem memory pool. As qcom_scm_shm_bridge_enable() is a special case, not meant to be called by ordinary users, pull it into the local SCM header. Make it take struct device as argument. This is the device that will be used to make the SCM call as opposed to the global __scm pointer. This will allow us to move the tzmem initialization *before* the __scm assignment in the core SCM driver. Signed-off-by: Bartosz Golaszewski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250630-qcom-scm-race-v2-2-fa3851c98611@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 82b1b8c50ca3..0f667bf1d4d9 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -148,7 +148,6 @@ bool qcom_scm_lmh_dcvsh_available(void); int qcom_scm_gpu_init_regs(u32 gpu_req); -int qcom_scm_shm_bridge_enable(void); int qcom_scm_shm_bridge_create(u64 pfn_and_ns_perm_flags, u64 ipfn_and_s_perm_flags, u64 size_and_flags, u64 ns_vmids, u64 *handle); -- cgit v1.2.3 From e53ff5b79fbac35d1fbf2b8c28a5a5dcf125567e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 25 Jun 2025 11:11:44 +0200 Subject: dt-bindings: arm: qcom,ids: Add SoC IDs for SM7635 family Add the SoC IDs of the 'volcano' family, namely SM7635, SM6650, SM6650P, QCM6690 and QCS6690. Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250625-sm7635-socinfo-v1-1-be09d5c697b8@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 897b8135dc12..cb8ce53146f0 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -279,8 +279,13 @@ #define QCOM_ID_QCM8550 604 #define QCOM_ID_SM8750 618 #define QCOM_ID_IPQ5300 624 +#define QCOM_ID_SM7635 636 +#define QCOM_ID_SM6650 640 +#define QCOM_ID_SM6650P 641 #define QCOM_ID_IPQ5321 650 #define QCOM_ID_IPQ5424 651 +#define QCOM_ID_QCM6690 657 +#define QCOM_ID_QCS6690 658 #define QCOM_ID_IPQ5404 671 #define QCOM_ID_QCS9100 667 #define QCOM_ID_QCS8300 674 -- cgit v1.2.3 From 50b749fab108c2354bb6368d95aaec82e3c99912 Mon Sep 17 00:00:00 2001 From: Rakesh Kota Date: Fri, 4 Jul 2025 17:00:36 +0530 Subject: soc: qcom: spmi-pmic: add more PMIC SUBTYPE IDs Add the PMM8650AU and PMM8650AU_PSAIL PMIC SUBTYPE IDs and These PMICs are used by the qcs8300 and qcs9100 platforms. Signed-off-by: Rakesh Kota Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250704113036.1627695-1-rakesh.kota@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/soc/qcom/qcom-spmi-pmic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/qcom/qcom-spmi-pmic.h b/include/soc/qcom/qcom-spmi-pmic.h index df3d3a0af98a..2cf9e2d8cd55 100644 --- a/include/soc/qcom/qcom-spmi-pmic.h +++ b/include/soc/qcom/qcom-spmi-pmic.h @@ -50,6 +50,8 @@ #define PMR735B_SUBTYPE 0x34 #define PM6350_SUBTYPE 0x36 #define PM4125_SUBTYPE 0x37 +#define PMM8650AU_SUBTYPE 0x4e +#define PMM8650AU_PSAIL_SUBTYPE 0x4f #define PMI8998_FAB_ID_SMIC 0x11 #define PMI8998_FAB_ID_GF 0x30 -- cgit v1.2.3 From 314b903c30040632db7edd187cd33003b2aee512 Mon Sep 17 00:00:00 2001 From: George Moussalem Date: Fri, 16 May 2025 16:36:09 +0400 Subject: dt-bindings: clock: qcom: Add CMN PLL support for IPQ5018 SoC The CMN PLL block in the IPQ5018 SoC takes 96 MHZ as the reference input clock. Its output clocks are the XO (24Mhz), sleep (32Khz), and ethernet (50Mhz) clocks. Reviewed-by: Rob Herring (Arm) Signed-off-by: George Moussalem Link: https://lore.kernel.org/r/20250516-ipq5018-cmn-pll-v4-2-389a6b30e504@outlook.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq5018-cmn-pll.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,ipq5018-cmn-pll.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq5018-cmn-pll.h b/include/dt-bindings/clock/qcom,ipq5018-cmn-pll.h new file mode 100644 index 000000000000..586d1c9b33b3 --- /dev/null +++ b/include/dt-bindings/clock/qcom,ipq5018-cmn-pll.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_IPQ5018_CMN_PLL_H +#define _DT_BINDINGS_CLK_QCOM_IPQ5018_CMN_PLL_H + +/* CMN PLL core clock. */ +#define IPQ5018_CMN_PLL_CLK 0 + +/* The output clocks from CMN PLL of IPQ5018. */ +#define IPQ5018_XO_24MHZ_CLK 1 +#define IPQ5018_SLEEP_32KHZ_CLK 2 +#define IPQ5018_ETH_50MHZ_CLK 3 +#endif -- cgit v1.2.3 From 8df29649903c067138180ef89f315b6f166b8732 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 2 Jul 2025 14:34:22 +0530 Subject: dt-bindings: clock: Add Qualcomm QCS615 Camera clock controller Add DT bindings for the Camera clock on QCS615 platforms. Add the relevant DT include definitions as well. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20250702-qcs615-mm-v10-clock-controllers-v11-2-9c216e1615ab@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qcs615-camcc.h | 110 ++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcs615-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qcs615-camcc.h b/include/dt-bindings/clock/qcom,qcs615-camcc.h new file mode 100644 index 000000000000..aec57dddc067 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs615-camcc.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_QCS615_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_QCS615_H + +/* CAM_CC clocks */ +#define CAM_CC_BPS_AHB_CLK 0 +#define CAM_CC_BPS_AREG_CLK 1 +#define CAM_CC_BPS_AXI_CLK 2 +#define CAM_CC_BPS_CLK 3 +#define CAM_CC_BPS_CLK_SRC 4 +#define CAM_CC_CAMNOC_ATB_CLK 5 +#define CAM_CC_CAMNOC_AXI_CLK 6 +#define CAM_CC_CCI_CLK 7 +#define CAM_CC_CCI_CLK_SRC 8 +#define CAM_CC_CORE_AHB_CLK 9 +#define CAM_CC_CPAS_AHB_CLK 10 +#define CAM_CC_CPHY_RX_CLK_SRC 11 +#define CAM_CC_CSI0PHYTIMER_CLK 12 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 13 +#define CAM_CC_CSI1PHYTIMER_CLK 14 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 15 +#define CAM_CC_CSI2PHYTIMER_CLK 16 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 17 +#define CAM_CC_CSIPHY0_CLK 18 +#define CAM_CC_CSIPHY1_CLK 19 +#define CAM_CC_CSIPHY2_CLK 20 +#define CAM_CC_FAST_AHB_CLK_SRC 21 +#define CAM_CC_ICP_ATB_CLK 22 +#define CAM_CC_ICP_CLK 23 +#define CAM_CC_ICP_CLK_SRC 24 +#define CAM_CC_ICP_CTI_CLK 25 +#define CAM_CC_ICP_TS_CLK 26 +#define CAM_CC_IFE_0_AXI_CLK 27 +#define CAM_CC_IFE_0_CLK 28 +#define CAM_CC_IFE_0_CLK_SRC 29 +#define CAM_CC_IFE_0_CPHY_RX_CLK 30 +#define CAM_CC_IFE_0_CSID_CLK 31 +#define CAM_CC_IFE_0_CSID_CLK_SRC 32 +#define CAM_CC_IFE_0_DSP_CLK 33 +#define CAM_CC_IFE_1_AXI_CLK 34 +#define CAM_CC_IFE_1_CLK 35 +#define CAM_CC_IFE_1_CLK_SRC 36 +#define CAM_CC_IFE_1_CPHY_RX_CLK 37 +#define CAM_CC_IFE_1_CSID_CLK 38 +#define CAM_CC_IFE_1_CSID_CLK_SRC 39 +#define CAM_CC_IFE_1_DSP_CLK 40 +#define CAM_CC_IFE_LITE_CLK 41 +#define CAM_CC_IFE_LITE_CLK_SRC 42 +#define CAM_CC_IFE_LITE_CPHY_RX_CLK 43 +#define CAM_CC_IFE_LITE_CSID_CLK 44 +#define CAM_CC_IFE_LITE_CSID_CLK_SRC 45 +#define CAM_CC_IPE_0_AHB_CLK 46 +#define CAM_CC_IPE_0_AREG_CLK 47 +#define CAM_CC_IPE_0_AXI_CLK 48 +#define CAM_CC_IPE_0_CLK 49 +#define CAM_CC_IPE_0_CLK_SRC 50 +#define CAM_CC_JPEG_CLK 51 +#define CAM_CC_JPEG_CLK_SRC 52 +#define CAM_CC_LRME_CLK 53 +#define CAM_CC_LRME_CLK_SRC 54 +#define CAM_CC_MCLK0_CLK 55 +#define CAM_CC_MCLK0_CLK_SRC 56 +#define CAM_CC_MCLK1_CLK 57 +#define CAM_CC_MCLK1_CLK_SRC 58 +#define CAM_CC_MCLK2_CLK 59 +#define CAM_CC_MCLK2_CLK_SRC 60 +#define CAM_CC_MCLK3_CLK 61 +#define CAM_CC_MCLK3_CLK_SRC 62 +#define CAM_CC_PLL0 63 +#define CAM_CC_PLL1 64 +#define CAM_CC_PLL2 65 +#define CAM_CC_PLL2_OUT_AUX2 66 +#define CAM_CC_PLL3 67 +#define CAM_CC_SLOW_AHB_CLK_SRC 68 +#define CAM_CC_SOC_AHB_CLK 69 +#define CAM_CC_SYS_TMR_CLK 70 + +/* CAM_CC power domains */ +#define BPS_GDSC 0 +#define IFE_0_GDSC 1 +#define IFE_1_GDSC 2 +#define IPE_0_GDSC 3 +#define TITAN_TOP_GDSC 4 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_CAMNOC_BCR 1 +#define CAM_CC_CCI_BCR 2 +#define CAM_CC_CPAS_BCR 3 +#define CAM_CC_CSI0PHY_BCR 4 +#define CAM_CC_CSI1PHY_BCR 5 +#define CAM_CC_CSI2PHY_BCR 6 +#define CAM_CC_ICP_BCR 7 +#define CAM_CC_IFE_0_BCR 8 +#define CAM_CC_IFE_1_BCR 9 +#define CAM_CC_IFE_LITE_BCR 10 +#define CAM_CC_IPE_0_BCR 11 +#define CAM_CC_JPEG_BCR 12 +#define CAM_CC_LRME_BCR 13 +#define CAM_CC_MCLK0_BCR 14 +#define CAM_CC_MCLK1_BCR 15 +#define CAM_CC_MCLK2_BCR 16 +#define CAM_CC_MCLK3_BCR 17 +#define CAM_CC_TITAN_TOP_BCR 18 + +#endif -- cgit v1.2.3 From 8b1750ea009f2774a3acd6c7bc9e61b5157101d1 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 2 Jul 2025 14:34:24 +0530 Subject: dt-bindings: clock: Add Qualcomm QCS615 Display clock controller Add DT bindings for the Display clock on QCS615 platforms. Add the relevant DT include definitions as well. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20250702-qcs615-mm-v10-clock-controllers-v11-4-9c216e1615ab@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qcs615-dispcc.h | 52 ++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcs615-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qcs615-dispcc.h b/include/dt-bindings/clock/qcom,qcs615-dispcc.h new file mode 100644 index 000000000000..9a29945c5762 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs615-dispcc.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_QCS615_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_QCS615_H + +/* DISP_CC clocks */ +#define DISP_CC_MDSS_AHB_CLK 0 +#define DISP_CC_MDSS_AHB_CLK_SRC 1 +#define DISP_CC_MDSS_BYTE0_CLK 2 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 3 +#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 4 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 5 +#define DISP_CC_MDSS_DP_AUX_CLK 6 +#define DISP_CC_MDSS_DP_AUX_CLK_SRC 7 +#define DISP_CC_MDSS_DP_CRYPTO_CLK 8 +#define DISP_CC_MDSS_DP_CRYPTO_CLK_SRC 9 +#define DISP_CC_MDSS_DP_LINK_CLK 10 +#define DISP_CC_MDSS_DP_LINK_CLK_SRC 11 +#define DISP_CC_MDSS_DP_LINK_DIV_CLK_SRC 12 +#define DISP_CC_MDSS_DP_LINK_INTF_CLK 13 +#define DISP_CC_MDSS_DP_PIXEL1_CLK 14 +#define DISP_CC_MDSS_DP_PIXEL1_CLK_SRC 15 +#define DISP_CC_MDSS_DP_PIXEL_CLK 16 +#define DISP_CC_MDSS_DP_PIXEL_CLK_SRC 17 +#define DISP_CC_MDSS_ESC0_CLK 18 +#define DISP_CC_MDSS_ESC0_CLK_SRC 19 +#define DISP_CC_MDSS_MDP_CLK 20 +#define DISP_CC_MDSS_MDP_CLK_SRC 21 +#define DISP_CC_MDSS_MDP_LUT_CLK 22 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 23 +#define DISP_CC_MDSS_PCLK0_CLK 24 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 25 +#define DISP_CC_MDSS_ROT_CLK 26 +#define DISP_CC_MDSS_ROT_CLK_SRC 27 +#define DISP_CC_MDSS_RSCC_AHB_CLK 28 +#define DISP_CC_MDSS_RSCC_VSYNC_CLK 29 +#define DISP_CC_MDSS_VSYNC_CLK 30 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 31 +#define DISP_CC_PLL0 32 +#define DISP_CC_XO_CLK 33 + +/* DISP_CC power domains */ +#define MDSS_CORE_GDSC 0 + +/* DISP_CC resets */ +#define DISP_CC_MDSS_CORE_BCR 0 +#define DISP_CC_MDSS_RSCC_BCR 1 + +#endif -- cgit v1.2.3 From 3590dfbdd1b3e4ceba0b7daed2a396f644c277c4 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 2 Jul 2025 14:34:26 +0530 Subject: dt-bindings: clock: Add Qualcomm QCS615 Graphics clock controller Add DT bindings for the Graphics clock on QCS615 platforms. Add the relevant DT include definitions as well. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20250702-qcs615-mm-v10-clock-controllers-v11-6-9c216e1615ab@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qcs615-gpucc.h | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcs615-gpucc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qcs615-gpucc.h b/include/dt-bindings/clock/qcom,qcs615-gpucc.h new file mode 100644 index 000000000000..6d8394b90d59 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs615-gpucc.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_QCS615_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_QCS615_H + +/* GPU_CC clocks */ +#define CRC_DIV_PLL0 0 +#define CRC_DIV_PLL1 1 +#define GPU_CC_PLL0 2 +#define GPU_CC_PLL1 3 +#define GPU_CC_CRC_AHB_CLK 4 +#define GPU_CC_CX_GFX3D_CLK 5 +#define GPU_CC_CX_GFX3D_SLV_CLK 6 +#define GPU_CC_CX_GMU_CLK 7 +#define GPU_CC_CX_SNOC_DVM_CLK 8 +#define GPU_CC_CXO_AON_CLK 9 +#define GPU_CC_CXO_CLK 10 +#define GPU_CC_GMU_CLK_SRC 11 +#define GPU_CC_GX_GFX3D_CLK 12 +#define GPU_CC_GX_GFX3D_CLK_SRC 13 +#define GPU_CC_GX_GMU_CLK 14 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 15 +#define GPU_CC_SLEEP_CLK 16 + +/* GPU_CC power domains */ +#define CX_GDSC 0 +#define GX_GDSC 1 + +/* GPU_CC resets */ +#define GPU_CC_CX_BCR 0 +#define GPU_CC_GFX3D_AON_BCR 1 +#define GPU_CC_GMU_BCR 2 +#define GPU_CC_GX_BCR 3 +#define GPU_CC_XO_BCR 4 + +#endif -- cgit v1.2.3 From 9c51c66c997cae09c12ec250a9f538c0c23d8930 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 2 Jul 2025 14:34:28 +0530 Subject: dt-bindings: clock: Add Qualcomm QCS615 Video clock controller Add DT bindings for the Video clock on QCS615 platforms. Add the relevant DT include definitions as well. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20250702-qcs615-mm-v10-clock-controllers-v11-8-9c216e1615ab@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qcs615-videocc.h | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcs615-videocc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qcs615-videocc.h b/include/dt-bindings/clock/qcom,qcs615-videocc.h new file mode 100644 index 000000000000..0ca3efb21103 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs615-videocc.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_VIDEO_CC_QCS615_H +#define _DT_BINDINGS_CLK_QCOM_VIDEO_CC_QCS615_H + +/* VIDEO_CC clocks */ +#define VIDEO_CC_SLEEP_CLK 0 +#define VIDEO_CC_SLEEP_CLK_SRC 1 +#define VIDEO_CC_VCODEC0_AXI_CLK 2 +#define VIDEO_CC_VCODEC0_CORE_CLK 3 +#define VIDEO_CC_VENUS_AHB_CLK 4 +#define VIDEO_CC_VENUS_CLK_SRC 5 +#define VIDEO_CC_VENUS_CTL_AXI_CLK 6 +#define VIDEO_CC_VENUS_CTL_CORE_CLK 7 +#define VIDEO_CC_XO_CLK 8 +#define VIDEO_PLL0 9 + +/* VIDEO_CC power domains */ +#define VCODEC0_GDSC 0 +#define VENUS_GDSC 1 + +/* VIDEO_CC resets */ +#define VIDEO_CC_INTERFACE_BCR 0 +#define VIDEO_CC_VCODEC0_BCR 1 +#define VIDEO_CC_VENUS_BCR 2 + +#endif -- cgit v1.2.3 From d0b706509fb04449add5446e51a494bfeadcac10 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 9 Jul 2025 12:08:56 +0200 Subject: dt-bindings: clock: qcom,x1e80100-gcc: Add missing video resets Add the missing video resets that are needed for the iris video codec. Acked-by: Rob Herring (Arm) Reviewed-by: Bryan O'Donoghue Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250709-x1e-videocc-v2-4-ad1acf5674b4@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,x1e80100-gcc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,x1e80100-gcc.h b/include/dt-bindings/clock/qcom,x1e80100-gcc.h index 24ba9e2a5cf6..710c340f24a5 100644 --- a/include/dt-bindings/clock/qcom,x1e80100-gcc.h +++ b/include/dt-bindings/clock/qcom,x1e80100-gcc.h @@ -482,4 +482,6 @@ #define GCC_USB_1_PHY_BCR 85 #define GCC_USB_2_PHY_BCR 86 #define GCC_VIDEO_BCR 87 +#define GCC_VIDEO_AXI0_CLK_ARES 88 +#define GCC_VIDEO_AXI1_CLK_ARES 89 #endif -- cgit v1.2.3 From 95ba6820a665c25f372a3cdc9c469bb0a86bf174 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:19:02 +0200 Subject: dt-bindings: clock: qcom: document the Milos Global Clock Controller Add bindings documentation for the Milos (e.g. SM7635) Global Clock Controller. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-clocks-v3-2-18f9faac4984@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,milos-gcc.h | 210 +++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,milos-gcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,milos-gcc.h b/include/dt-bindings/clock/qcom,milos-gcc.h new file mode 100644 index 000000000000..a530ca39e1ef --- /dev/null +++ b/include/dt-bindings/clock/qcom,milos-gcc.h @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_MILOS_H +#define _DT_BINDINGS_CLK_QCOM_GCC_MILOS_H + +/* GCC clocks */ +#define GCC_GPLL0 0 +#define GCC_GPLL0_OUT_EVEN 1 +#define GCC_GPLL2 2 +#define GCC_GPLL4 3 +#define GCC_GPLL6 4 +#define GCC_GPLL7 5 +#define GCC_GPLL9 6 +#define GCC_AGGRE_NOC_PCIE_AXI_CLK 7 +#define GCC_AGGRE_UFS_PHY_AXI_CLK 8 +#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK 9 +#define GCC_AGGRE_USB3_PRIM_AXI_CLK 10 +#define GCC_BOOT_ROM_AHB_CLK 11 +#define GCC_CAMERA_AHB_CLK 12 +#define GCC_CAMERA_HF_AXI_CLK 13 +#define GCC_CAMERA_HF_XO_CLK 14 +#define GCC_CAMERA_SF_AXI_CLK 15 +#define GCC_CAMERA_SF_XO_CLK 16 +#define GCC_CFG_NOC_PCIE_ANOC_AHB_CLK 17 +#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK 18 +#define GCC_CNOC_PCIE_SF_AXI_CLK 19 +#define GCC_DDRSS_GPU_AXI_CLK 20 +#define GCC_DDRSS_PCIE_SF_QTB_CLK 21 +#define GCC_DISP_AHB_CLK 22 +#define GCC_DISP_GPLL0_DIV_CLK_SRC 23 +#define GCC_DISP_HF_AXI_CLK 24 +#define GCC_DISP_XO_CLK 25 +#define GCC_GP1_CLK 26 +#define GCC_GP1_CLK_SRC 27 +#define GCC_GP2_CLK 28 +#define GCC_GP2_CLK_SRC 29 +#define GCC_GP3_CLK 30 +#define GCC_GP3_CLK_SRC 31 +#define GCC_GPU_CFG_AHB_CLK 32 +#define GCC_GPU_GPLL0_CLK_SRC 33 +#define GCC_GPU_GPLL0_DIV_CLK_SRC 34 +#define GCC_GPU_MEMNOC_GFX_CLK 35 +#define GCC_GPU_SNOC_DVM_GFX_CLK 36 +#define GCC_PCIE_0_AUX_CLK 37 +#define GCC_PCIE_0_AUX_CLK_SRC 38 +#define GCC_PCIE_0_CFG_AHB_CLK 39 +#define GCC_PCIE_0_MSTR_AXI_CLK 40 +#define GCC_PCIE_0_PHY_RCHNG_CLK 41 +#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC 42 +#define GCC_PCIE_0_PIPE_CLK 43 +#define GCC_PCIE_0_PIPE_CLK_SRC 44 +#define GCC_PCIE_0_PIPE_DIV2_CLK 45 +#define GCC_PCIE_0_PIPE_DIV2_CLK_SRC 46 +#define GCC_PCIE_0_SLV_AXI_CLK 47 +#define GCC_PCIE_0_SLV_Q2A_AXI_CLK 48 +#define GCC_PCIE_1_AUX_CLK 49 +#define GCC_PCIE_1_AUX_CLK_SRC 50 +#define GCC_PCIE_1_CFG_AHB_CLK 51 +#define GCC_PCIE_1_MSTR_AXI_CLK 52 +#define GCC_PCIE_1_PHY_RCHNG_CLK 53 +#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC 54 +#define GCC_PCIE_1_PIPE_CLK 55 +#define GCC_PCIE_1_PIPE_CLK_SRC 56 +#define GCC_PCIE_1_PIPE_DIV2_CLK 57 +#define GCC_PCIE_1_PIPE_DIV2_CLK_SRC 58 +#define GCC_PCIE_1_SLV_AXI_CLK 59 +#define GCC_PCIE_1_SLV_Q2A_AXI_CLK 60 +#define GCC_PCIE_RSCC_CFG_AHB_CLK 61 +#define GCC_PCIE_RSCC_XO_CLK 62 +#define GCC_PDM2_CLK 63 +#define GCC_PDM2_CLK_SRC 64 +#define GCC_PDM_AHB_CLK 65 +#define GCC_PDM_XO4_CLK 66 +#define GCC_QMIP_CAMERA_NRT_AHB_CLK 67 +#define GCC_QMIP_CAMERA_RT_AHB_CLK 68 +#define GCC_QMIP_DISP_AHB_CLK 69 +#define GCC_QMIP_GPU_AHB_CLK 70 +#define GCC_QMIP_PCIE_AHB_CLK 71 +#define GCC_QMIP_VIDEO_CV_CPU_AHB_CLK 72 +#define GCC_QMIP_VIDEO_CVP_AHB_CLK 73 +#define GCC_QMIP_VIDEO_V_CPU_AHB_CLK 74 +#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK 75 +#define GCC_QUPV3_WRAP0_CORE_2X_CLK 76 +#define GCC_QUPV3_WRAP0_CORE_CLK 77 +#define GCC_QUPV3_WRAP0_QSPI_REF_CLK 78 +#define GCC_QUPV3_WRAP0_QSPI_REF_CLK_SRC 79 +#define GCC_QUPV3_WRAP0_S0_CLK 80 +#define GCC_QUPV3_WRAP0_S0_CLK_SRC 81 +#define GCC_QUPV3_WRAP0_S1_CLK 82 +#define GCC_QUPV3_WRAP0_S1_CLK_SRC 83 +#define GCC_QUPV3_WRAP0_S2_CLK 84 +#define GCC_QUPV3_WRAP0_S2_CLK_SRC 85 +#define GCC_QUPV3_WRAP0_S3_CLK 86 +#define GCC_QUPV3_WRAP0_S3_CLK_SRC 87 +#define GCC_QUPV3_WRAP0_S4_CLK 88 +#define GCC_QUPV3_WRAP0_S4_CLK_SRC 89 +#define GCC_QUPV3_WRAP0_S5_CLK 90 +#define GCC_QUPV3_WRAP0_S5_CLK_SRC 91 +#define GCC_QUPV3_WRAP0_S6_CLK 92 +#define GCC_QUPV3_WRAP0_S6_CLK_SRC 93 +#define GCC_QUPV3_WRAP1_CORE_2X_CLK 94 +#define GCC_QUPV3_WRAP1_CORE_CLK 95 +#define GCC_QUPV3_WRAP1_QSPI_REF_CLK 96 +#define GCC_QUPV3_WRAP1_QSPI_REF_CLK_SRC 97 +#define GCC_QUPV3_WRAP1_S0_CLK 98 +#define GCC_QUPV3_WRAP1_S0_CLK_SRC 99 +#define GCC_QUPV3_WRAP1_S1_CLK 100 +#define GCC_QUPV3_WRAP1_S1_CLK_SRC 101 +#define GCC_QUPV3_WRAP1_S2_CLK 102 +#define GCC_QUPV3_WRAP1_S2_CLK_SRC 103 +#define GCC_QUPV3_WRAP1_S3_CLK 104 +#define GCC_QUPV3_WRAP1_S3_CLK_SRC 105 +#define GCC_QUPV3_WRAP1_S4_CLK 106 +#define GCC_QUPV3_WRAP1_S4_CLK_SRC 107 +#define GCC_QUPV3_WRAP1_S5_CLK 108 +#define GCC_QUPV3_WRAP1_S5_CLK_SRC 109 +#define GCC_QUPV3_WRAP1_S6_CLK 110 +#define GCC_QUPV3_WRAP1_S6_CLK_SRC 111 +#define GCC_QUPV3_WRAP_0_M_AHB_CLK 112 +#define GCC_QUPV3_WRAP_0_S_AHB_CLK 113 +#define GCC_QUPV3_WRAP_1_M_AHB_CLK 114 +#define GCC_QUPV3_WRAP_1_S_AHB_CLK 115 +#define GCC_SDCC1_AHB_CLK 116 +#define GCC_SDCC1_APPS_CLK 117 +#define GCC_SDCC1_APPS_CLK_SRC 118 +#define GCC_SDCC1_ICE_CORE_CLK 119 +#define GCC_SDCC1_ICE_CORE_CLK_SRC 120 +#define GCC_SDCC2_AHB_CLK 121 +#define GCC_SDCC2_APPS_CLK 122 +#define GCC_SDCC2_APPS_CLK_SRC 123 +#define GCC_UFS_PHY_AHB_CLK 124 +#define GCC_UFS_PHY_AXI_CLK 125 +#define GCC_UFS_PHY_AXI_CLK_SRC 126 +#define GCC_UFS_PHY_AXI_HW_CTL_CLK 127 +#define GCC_UFS_PHY_ICE_CORE_CLK 128 +#define GCC_UFS_PHY_ICE_CORE_CLK_SRC 129 +#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK 130 +#define GCC_UFS_PHY_PHY_AUX_CLK 131 +#define GCC_UFS_PHY_PHY_AUX_CLK_SRC 132 +#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK 133 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK 134 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK_SRC 135 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK 136 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK_SRC 137 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK 138 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK_SRC 139 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK 140 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC 141 +#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK 142 +#define GCC_USB30_PRIM_ATB_CLK 143 +#define GCC_USB30_PRIM_MASTER_CLK 144 +#define GCC_USB30_PRIM_MASTER_CLK_SRC 145 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK 146 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC 147 +#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC 148 +#define GCC_USB30_PRIM_SLEEP_CLK 149 +#define GCC_USB3_PRIM_PHY_AUX_CLK 150 +#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC 151 +#define GCC_USB3_PRIM_PHY_COM_AUX_CLK 152 +#define GCC_USB3_PRIM_PHY_PIPE_CLK 153 +#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC 154 +#define GCC_VIDEO_AHB_CLK 155 +#define GCC_VIDEO_AXI0_CLK 156 +#define GCC_VIDEO_XO_CLK 157 + +/* GCC resets */ +#define GCC_CAMERA_BCR 0 +#define GCC_DISPLAY_BCR 1 +#define GCC_GPU_BCR 2 +#define GCC_PCIE_0_BCR 3 +#define GCC_PCIE_0_LINK_DOWN_BCR 4 +#define GCC_PCIE_0_NOCSR_COM_PHY_BCR 5 +#define GCC_PCIE_0_PHY_BCR 6 +#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR 7 +#define GCC_PCIE_1_BCR 8 +#define GCC_PCIE_1_LINK_DOWN_BCR 9 +#define GCC_PCIE_1_NOCSR_COM_PHY_BCR 10 +#define GCC_PCIE_1_PHY_BCR 11 +#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR 12 +#define GCC_PCIE_RSCC_BCR 13 +#define GCC_PDM_BCR 14 +#define GCC_QUPV3_WRAPPER_0_BCR 15 +#define GCC_QUPV3_WRAPPER_1_BCR 16 +#define GCC_QUSB2PHY_PRIM_BCR 17 +#define GCC_QUSB2PHY_SEC_BCR 18 +#define GCC_SDCC1_BCR 19 +#define GCC_SDCC2_BCR 20 +#define GCC_UFS_PHY_BCR 21 +#define GCC_USB30_PRIM_BCR 22 +#define GCC_USB3_DP_PHY_PRIM_BCR 23 +#define GCC_USB3_PHY_PRIM_BCR 24 +#define GCC_USB3PHY_PHY_PRIM_BCR 25 +#define GCC_VIDEO_AXI0_CLK_ARES 26 +#define GCC_VIDEO_BCR 27 + +/* GCC power domains */ +#define PCIE_0_GDSC 0 +#define PCIE_0_PHY_GDSC 1 +#define PCIE_1_GDSC 2 +#define PCIE_1_PHY_GDSC 3 +#define UFS_PHY_GDSC 4 +#define UFS_MEM_PHY_GDSC 5 +#define USB30_PRIM_GDSC 6 +#define USB3_PHY_GDSC 7 + +#endif -- cgit v1.2.3 From dbb9d53b7197b6b13d0137c0ea45902ef26e2bb4 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:19:04 +0200 Subject: dt-bindings: clock: qcom: document the Milos Camera Clock Controller Add bindings documentation for the Milos (e.g. SM7635) Camera Clock Controller. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-clocks-v3-4-18f9faac4984@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,milos-camcc.h | 131 +++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,milos-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,milos-camcc.h b/include/dt-bindings/clock/qcom,milos-camcc.h new file mode 100644 index 000000000000..21925dca9a20 --- /dev/null +++ b/include/dt-bindings/clock/qcom,milos-camcc.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_MILOS_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_MILOS_H + +/* CAM_CC clocks */ +#define CAM_CC_PLL0 0 +#define CAM_CC_PLL0_OUT_EVEN 1 +#define CAM_CC_PLL0_OUT_ODD 2 +#define CAM_CC_PLL1 3 +#define CAM_CC_PLL1_OUT_EVEN 4 +#define CAM_CC_PLL2 5 +#define CAM_CC_PLL2_OUT_EVEN 6 +#define CAM_CC_PLL3 7 +#define CAM_CC_PLL3_OUT_EVEN 8 +#define CAM_CC_PLL4 9 +#define CAM_CC_PLL4_OUT_EVEN 10 +#define CAM_CC_PLL5 11 +#define CAM_CC_PLL5_OUT_EVEN 12 +#define CAM_CC_PLL6 13 +#define CAM_CC_PLL6_OUT_EVEN 14 +#define CAM_CC_BPS_AHB_CLK 15 +#define CAM_CC_BPS_AREG_CLK 16 +#define CAM_CC_BPS_CLK 17 +#define CAM_CC_BPS_CLK_SRC 18 +#define CAM_CC_CAMNOC_ATB_CLK 19 +#define CAM_CC_CAMNOC_AXI_CLK_SRC 20 +#define CAM_CC_CAMNOC_AXI_HF_CLK 21 +#define CAM_CC_CAMNOC_AXI_SF_CLK 22 +#define CAM_CC_CAMNOC_NRT_AXI_CLK 23 +#define CAM_CC_CAMNOC_RT_AXI_CLK 24 +#define CAM_CC_CCI_0_CLK 25 +#define CAM_CC_CCI_0_CLK_SRC 26 +#define CAM_CC_CCI_1_CLK 27 +#define CAM_CC_CCI_1_CLK_SRC 28 +#define CAM_CC_CORE_AHB_CLK 29 +#define CAM_CC_CPAS_AHB_CLK 30 +#define CAM_CC_CPHY_RX_CLK_SRC 31 +#define CAM_CC_CRE_AHB_CLK 32 +#define CAM_CC_CRE_CLK 33 +#define CAM_CC_CRE_CLK_SRC 34 +#define CAM_CC_CSI0PHYTIMER_CLK 35 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 36 +#define CAM_CC_CSI1PHYTIMER_CLK 37 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 38 +#define CAM_CC_CSI2PHYTIMER_CLK 39 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 40 +#define CAM_CC_CSI3PHYTIMER_CLK 41 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 42 +#define CAM_CC_CSIPHY0_CLK 43 +#define CAM_CC_CSIPHY1_CLK 44 +#define CAM_CC_CSIPHY2_CLK 45 +#define CAM_CC_CSIPHY3_CLK 46 +#define CAM_CC_FAST_AHB_CLK_SRC 47 +#define CAM_CC_GDSC_CLK 48 +#define CAM_CC_ICP_ATB_CLK 49 +#define CAM_CC_ICP_CLK 50 +#define CAM_CC_ICP_CLK_SRC 51 +#define CAM_CC_ICP_CTI_CLK 52 +#define CAM_CC_ICP_TS_CLK 53 +#define CAM_CC_MCLK0_CLK 54 +#define CAM_CC_MCLK0_CLK_SRC 55 +#define CAM_CC_MCLK1_CLK 56 +#define CAM_CC_MCLK1_CLK_SRC 57 +#define CAM_CC_MCLK2_CLK 58 +#define CAM_CC_MCLK2_CLK_SRC 59 +#define CAM_CC_MCLK3_CLK 60 +#define CAM_CC_MCLK3_CLK_SRC 61 +#define CAM_CC_MCLK4_CLK 62 +#define CAM_CC_MCLK4_CLK_SRC 63 +#define CAM_CC_OPE_0_AHB_CLK 64 +#define CAM_CC_OPE_0_AREG_CLK 65 +#define CAM_CC_OPE_0_CLK 66 +#define CAM_CC_OPE_0_CLK_SRC 67 +#define CAM_CC_SLEEP_CLK 68 +#define CAM_CC_SLEEP_CLK_SRC 69 +#define CAM_CC_SLOW_AHB_CLK_SRC 70 +#define CAM_CC_SOC_AHB_CLK 71 +#define CAM_CC_SYS_TMR_CLK 72 +#define CAM_CC_TFE_0_AHB_CLK 73 +#define CAM_CC_TFE_0_CLK 74 +#define CAM_CC_TFE_0_CLK_SRC 75 +#define CAM_CC_TFE_0_CPHY_RX_CLK 76 +#define CAM_CC_TFE_0_CSID_CLK 77 +#define CAM_CC_TFE_0_CSID_CLK_SRC 78 +#define CAM_CC_TFE_1_AHB_CLK 79 +#define CAM_CC_TFE_1_CLK 80 +#define CAM_CC_TFE_1_CLK_SRC 81 +#define CAM_CC_TFE_1_CPHY_RX_CLK 82 +#define CAM_CC_TFE_1_CSID_CLK 83 +#define CAM_CC_TFE_1_CSID_CLK_SRC 84 +#define CAM_CC_TFE_2_AHB_CLK 85 +#define CAM_CC_TFE_2_CLK 86 +#define CAM_CC_TFE_2_CLK_SRC 87 +#define CAM_CC_TFE_2_CPHY_RX_CLK 88 +#define CAM_CC_TFE_2_CSID_CLK 89 +#define CAM_CC_TFE_2_CSID_CLK_SRC 90 +#define CAM_CC_TOP_SHIFT_CLK 91 +#define CAM_CC_XO_CLK_SRC 92 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_CAMNOC_BCR 1 +#define CAM_CC_CAMSS_TOP_BCR 2 +#define CAM_CC_CCI_0_BCR 3 +#define CAM_CC_CCI_1_BCR 4 +#define CAM_CC_CPAS_BCR 5 +#define CAM_CC_CRE_BCR 6 +#define CAM_CC_CSI0PHY_BCR 7 +#define CAM_CC_CSI1PHY_BCR 8 +#define CAM_CC_CSI2PHY_BCR 9 +#define CAM_CC_CSI3PHY_BCR 10 +#define CAM_CC_ICP_BCR 11 +#define CAM_CC_MCLK0_BCR 12 +#define CAM_CC_MCLK1_BCR 13 +#define CAM_CC_MCLK2_BCR 14 +#define CAM_CC_MCLK3_BCR 15 +#define CAM_CC_MCLK4_BCR 16 +#define CAM_CC_OPE_0_BCR 17 +#define CAM_CC_TFE_0_BCR 18 +#define CAM_CC_TFE_1_BCR 19 +#define CAM_CC_TFE_2_BCR 20 + +/* CAM_CC power domains */ +#define CAM_CC_CAMSS_TOP_GDSC 0 + +#endif -- cgit v1.2.3 From 63edb206a3a93f523579df7f49f2989aae4e8450 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:19:06 +0200 Subject: dt-bindings: clock: qcom: document the Milos Display Clock Controller Add bindings documentation for the Milos (e.g. SM7635) Display Clock Controller. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-clocks-v3-6-18f9faac4984@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,milos-dispcc.h | 61 +++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,milos-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,milos-dispcc.h b/include/dt-bindings/clock/qcom,milos-dispcc.h new file mode 100644 index 000000000000..c70f23f32f0a --- /dev/null +++ b/include/dt-bindings/clock/qcom,milos-dispcc.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_MILOS_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_MILOS_H + +/* DISP_CC clocks */ +#define DISP_CC_PLL0 0 +#define DISP_CC_MDSS_ACCU_CLK 1 +#define DISP_CC_MDSS_AHB1_CLK 2 +#define DISP_CC_MDSS_AHB_CLK 3 +#define DISP_CC_MDSS_AHB_CLK_SRC 4 +#define DISP_CC_MDSS_BYTE0_CLK 5 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 6 +#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 7 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 8 +#define DISP_CC_MDSS_DPTX0_AUX_CLK 9 +#define DISP_CC_MDSS_DPTX0_AUX_CLK_SRC 10 +#define DISP_CC_MDSS_DPTX0_CRYPTO_CLK 11 +#define DISP_CC_MDSS_DPTX0_LINK_CLK 12 +#define DISP_CC_MDSS_DPTX0_LINK_CLK_SRC 13 +#define DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC 14 +#define DISP_CC_MDSS_DPTX0_LINK_INTF_CLK 15 +#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK 16 +#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC 17 +#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK 18 +#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC 19 +#define DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK 20 +#define DISP_CC_MDSS_ESC0_CLK 21 +#define DISP_CC_MDSS_ESC0_CLK_SRC 22 +#define DISP_CC_MDSS_MDP1_CLK 23 +#define DISP_CC_MDSS_MDP_CLK 24 +#define DISP_CC_MDSS_MDP_CLK_SRC 25 +#define DISP_CC_MDSS_MDP_LUT1_CLK 26 +#define DISP_CC_MDSS_MDP_LUT_CLK 27 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 28 +#define DISP_CC_MDSS_PCLK0_CLK 29 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 30 +#define DISP_CC_MDSS_RSCC_AHB_CLK 31 +#define DISP_CC_MDSS_RSCC_VSYNC_CLK 32 +#define DISP_CC_MDSS_VSYNC1_CLK 33 +#define DISP_CC_MDSS_VSYNC_CLK 34 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 35 +#define DISP_CC_SLEEP_CLK 36 +#define DISP_CC_SLEEP_CLK_SRC 37 +#define DISP_CC_XO_CLK 38 +#define DISP_CC_XO_CLK_SRC 39 + +/* DISP_CC resets */ +#define DISP_CC_MDSS_CORE_BCR 0 +#define DISP_CC_MDSS_CORE_INT2_BCR 1 +#define DISP_CC_MDSS_RSCC_BCR 2 + +/* DISP_CC power domains */ +#define DISP_CC_MDSS_CORE_GDSC 0 +#define DISP_CC_MDSS_CORE_INT2_GDSC 1 + +#endif -- cgit v1.2.3 From 7e5368a14b8c295470ab07d2a9ad8ee9bf7187ee Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:19:08 +0200 Subject: dt-bindings: clock: qcom: document the Milos GPU Clock Controller Add bindings documentation for the Milos (e.g. SM7635) Graphics Clock Controller. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-clocks-v3-8-18f9faac4984@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,milos-gpucc.h | 56 ++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,milos-gpucc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,milos-gpucc.h b/include/dt-bindings/clock/qcom,milos-gpucc.h new file mode 100644 index 000000000000..6ff1925d409f --- /dev/null +++ b/include/dt-bindings/clock/qcom,milos-gpucc.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_MILOS_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_MILOS_H + +/* GPU_CC clocks */ +#define GPU_CC_PLL0 0 +#define GPU_CC_PLL0_OUT_EVEN 1 +#define GPU_CC_AHB_CLK 2 +#define GPU_CC_CB_CLK 3 +#define GPU_CC_CX_ACCU_SHIFT_CLK 4 +#define GPU_CC_CX_FF_CLK 5 +#define GPU_CC_CX_GMU_CLK 6 +#define GPU_CC_CXO_AON_CLK 7 +#define GPU_CC_CXO_CLK 8 +#define GPU_CC_DEMET_CLK 9 +#define GPU_CC_DEMET_DIV_CLK_SRC 10 +#define GPU_CC_DPM_CLK 11 +#define GPU_CC_FF_CLK_SRC 12 +#define GPU_CC_FREQ_MEASURE_CLK 13 +#define GPU_CC_GMU_CLK_SRC 14 +#define GPU_CC_GX_ACCU_SHIFT_CLK 15 +#define GPU_CC_GX_ACD_AHB_FF_CLK 16 +#define GPU_CC_GX_AHB_FF_CLK 17 +#define GPU_CC_GX_GMU_CLK 18 +#define GPU_CC_GX_RCG_AHB_FF_CLK 19 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 20 +#define GPU_CC_HUB_AON_CLK 21 +#define GPU_CC_HUB_CLK_SRC 22 +#define GPU_CC_HUB_CX_INT_CLK 23 +#define GPU_CC_HUB_DIV_CLK_SRC 24 +#define GPU_CC_MEMNOC_GFX_CLK 25 +#define GPU_CC_RSCC_HUB_AON_CLK 26 +#define GPU_CC_RSCC_XO_AON_CLK 27 +#define GPU_CC_SLEEP_CLK 28 +#define GPU_CC_XO_CLK_SRC 29 +#define GPU_CC_XO_DIV_CLK_SRC 30 + +/* GPU_CC resets */ +#define GPU_CC_CB_BCR 0 +#define GPU_CC_CX_BCR 1 +#define GPU_CC_FAST_HUB_BCR 2 +#define GPU_CC_FF_BCR 3 +#define GPU_CC_GMU_BCR 4 +#define GPU_CC_GX_BCR 5 +#define GPU_CC_RBCPR_BCR 6 +#define GPU_CC_XO_BCR 7 + +/* GPU_CC power domains */ +#define GPU_CC_CX_GDSC 0 + +#endif -- cgit v1.2.3 From a4937e9741867865bb307ae9dde6ef393b68540b Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 15 Jul 2025 09:19:10 +0200 Subject: dt-bindings: clock: qcom: document the Milos Video Clock Controller Add bindings documentation for the Milos (e.g. SM7635) Video Clock Controller. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250715-sm7635-clocks-v3-10-18f9faac4984@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,milos-videocc.h | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,milos-videocc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,milos-videocc.h b/include/dt-bindings/clock/qcom,milos-videocc.h new file mode 100644 index 000000000000..3544db81ffae --- /dev/null +++ b/include/dt-bindings/clock/qcom,milos-videocc.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_VIDEO_CC_MILOS_H +#define _DT_BINDINGS_CLK_QCOM_VIDEO_CC_MILOS_H + +/* VIDEO_CC clocks */ +#define VIDEO_CC_PLL0 0 +#define VIDEO_CC_AHB_CLK 1 +#define VIDEO_CC_AHB_CLK_SRC 2 +#define VIDEO_CC_MVS0_CLK 3 +#define VIDEO_CC_MVS0_CLK_SRC 4 +#define VIDEO_CC_MVS0_DIV_CLK_SRC 5 +#define VIDEO_CC_MVS0_SHIFT_CLK 6 +#define VIDEO_CC_MVS0C_CLK 7 +#define VIDEO_CC_MVS0C_DIV2_DIV_CLK_SRC 8 +#define VIDEO_CC_MVS0C_SHIFT_CLK 9 +#define VIDEO_CC_SLEEP_CLK 10 +#define VIDEO_CC_SLEEP_CLK_SRC 11 +#define VIDEO_CC_XO_CLK 12 +#define VIDEO_CC_XO_CLK_SRC 13 + +/* VIDEO_CC resets */ +#define VIDEO_CC_INTERFACE_BCR 0 +#define VIDEO_CC_MVS0_BCR 1 +#define VIDEO_CC_MVS0C_CLK_ARES 2 +#define VIDEO_CC_MVS0C_BCR 3 + +/* VIDEO_CC power domains */ +#define VIDEO_CC_MVS0_GDSC 0 +#define VIDEO_CC_MVS0C_GDSC 1 + +#endif -- cgit v1.2.3 From 2d72afb340657f03f7261e9243b44457a9228ac7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 16 Jul 2025 20:39:14 +0200 Subject: netfilter: nf_conntrack: fix crash due to removal of uninitialised entry A crash in conntrack was reported while trying to unlink the conntrack entry from the hash bucket list: [exception RIP: __nf_ct_delete_from_lists+172] [..] #7 [ff539b5a2b043aa0] nf_ct_delete at ffffffffc124d421 [nf_conntrack] #8 [ff539b5a2b043ad0] nf_ct_gc_expired at ffffffffc124d999 [nf_conntrack] #9 [ff539b5a2b043ae0] __nf_conntrack_find_get at ffffffffc124efbc [nf_conntrack] [..] The nf_conn struct is marked as allocated from slab but appears to be in a partially initialised state: ct hlist pointer is garbage; looks like the ct hash value (hence crash). ct->status is equal to IPS_CONFIRMED|IPS_DYING, which is expected ct->timeout is 30000 (=30s), which is unexpected. Everything else looks like normal udp conntrack entry. If we ignore ct->status and pretend its 0, the entry matches those that are newly allocated but not yet inserted into the hash: - ct hlist pointers are overloaded and store/cache the raw tuple hash - ct->timeout matches the relative time expected for a new udp flow rather than the absolute 'jiffies' value. If it were not for the presence of IPS_CONFIRMED, __nf_conntrack_find_get() would have skipped the entry. Theory is that we did hit following race: cpu x cpu y cpu z found entry E found entry E E is expired nf_ct_delete() return E to rcu slab init_conntrack E is re-inited, ct->status set to 0 reply tuplehash hnnode.pprev stores hash value. cpu y found E right before it was deleted on cpu x. E is now re-inited on cpu z. cpu y was preempted before checking for expiry and/or confirm bit. ->refcnt set to 1 E now owned by skb ->timeout set to 30000 If cpu y were to resume now, it would observe E as expired but would skip E due to missing CONFIRMED bit. nf_conntrack_confirm gets called sets: ct->status |= CONFIRMED This is wrong: E is not yet added to hashtable. cpu y resumes, it observes E as expired but CONFIRMED: nf_ct_expired() -> yes (ct->timeout is 30s) confirmed bit set. cpu y will try to delete E from the hashtable: nf_ct_delete() -> set DYING bit __nf_ct_delete_from_lists Even this scenario doesn't guarantee a crash: cpu z still holds the table bucket lock(s) so y blocks: wait for spinlock held by z CONFIRMED is set but there is no guarantee ct will be added to hash: "chaintoolong" or "clash resolution" logic both skip the insert step. reply hnnode.pprev still stores the hash value. unlocks spinlock return NF_DROP In case CPU z does insert the entry into the hashtable, cpu y will unlink E again right away but no crash occurs. Without 'cpu y' race, 'garbage' hlist is of no consequence: ct refcnt remains at 1, eventually skb will be free'd and E gets destroyed via: nf_conntrack_put -> nf_conntrack_destroy -> nf_ct_destroy. To resolve this, move the IPS_CONFIRMED assignment after the table insertion but before the unlock. Pablo points out that the confirm-bit-store could be reordered to happen before hlist add resp. the timeout fixup, so switch to set_bit and before_atomic memory barrier to prevent this. It doesn't matter if other CPUs can observe a newly inserted entry right before the CONFIRMED bit was set: Such event cannot be distinguished from above "E is the old incarnation" case: the entry will be skipped. Also change nf_ct_should_gc() to first check the confirmed bit. The gc sequence is: 1. Check if entry has expired, if not skip to next entry 2. Obtain a reference to the expired entry. 3. Call nf_ct_should_gc() to double-check step 1. nf_ct_should_gc() is thus called only for entries that already failed an expiry check. After this patch, once the confirmed bit check passes ct->timeout has been altered to reflect the absolute 'best before' date instead of a relative time. Step 3 will therefore not remove the entry. Without this change to nf_ct_should_gc() we could still get this sequence: 1. Check if entry has expired. 2. Obtain a reference. 3. Call nf_ct_should_gc() to double-check step 1: 4 - entry is still observed as expired 5 - meanwhile, ct->timeout is corrected to absolute value on other CPU and confirm bit gets set 6 - confirm bit is seen 7 - valid entry is removed again First do check 6), then 4) so the gc expiry check always picks up either confirmed bit unset (entry gets skipped) or expiry re-check failure for re-inited conntrack objects. This change cannot be backported to releases before 5.19. Without commit 8a75a2c17410 ("netfilter: conntrack: remove unconfirmed list") |= IPS_CONFIRMED line cannot be moved without further changes. Cc: Razvan Cojocaru Link: https://lore.kernel.org/netfilter-devel/20250627142758.25664-1-fw@strlen.de/ Link: https://lore.kernel.org/netfilter-devel/4239da15-83ff-4ca4-939d-faef283471bb@gmail.com/ Fixes: 1397af5bfd7d ("netfilter: conntrack: remove the percpu dying list") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 3f02a45773e8..ca26274196b9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -306,8 +306,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct) /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { - return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && - !nf_ct_is_dying(ct); + if (!nf_ct_is_confirmed(ct)) + return false; + + /* load ct->timeout after is_confirmed() test. + * Pairs with __nf_conntrack_confirm() which: + * 1. Increases ct->timeout value + * 2. Inserts ct into rcu hlist + * 3. Sets the confirmed bit + * 4. Unlocks the hlist lock + */ + smp_acquire__after_ctrl_dep(); + + return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) -- cgit v1.2.3 From ddb8172cdf8854a215ce23ad0f20b2578fa512db Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Jul 2025 16:33:44 +0300 Subject: watchdog: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250708133646.70384-3-andriy.shevchenko@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 99660197a36c..8c60687a3e55 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -9,14 +9,18 @@ #ifndef _LINUX_WATCHDOG_H #define _LINUX_WATCHDOG_H - #include -#include -#include -#include +#include #include +#include +#include + #include +struct attribute_group; +struct device; +struct module; + struct watchdog_ops; struct watchdog_device; struct watchdog_core_data; -- cgit v1.2.3 From b5cd5f1e50205831cb078f5c52359004eb1cbe74 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 24 Jun 2025 21:16:33 -0700 Subject: nvme: fix typo in status code constant for self-test in progress Correct a typo error in the NVMe status code constant from NVME_SC_SELT_TEST_IN_PROGRESS to NVME_SC_SELF_TEST_IN_PROGRESS to accurately reflect its meaning. Signed-off-by: Alok Tiwari Reviewed-by: Randy Dunlap Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b65a1b9f2116..655d194f8e72 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2155,7 +2155,7 @@ enum { NVME_SC_NS_NOT_ATTACHED = 0x11a, NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, NVME_SC_CTRL_LIST_INVALID = 0x11c, - NVME_SC_SELT_TEST_IN_PROGRESS = 0x11d, + NVME_SC_SELF_TEST_IN_PROGRESS = 0x11d, NVME_SC_BP_WRITE_PROHIBITED = 0x11e, NVME_SC_CTRL_ID_INVALID = 0x11f, NVME_SC_SEC_CTRL_STATE_INVALID = 0x120, -- cgit v1.2.3 From 6381061d82141909c382811978ccdd7566698bca Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 11 Jul 2025 10:52:53 +0000 Subject: ilog2: add max_pow_of_two_factor() Relocate the function max_pow_of_two_factor() to common ilog2.h from the xfs code, as it will be used elsewhere. Also simplify the function, as advised by Mikulas Patocka. Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250711105258.3135198-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/log2.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/log2.h b/include/linux/log2.h index 1366cb688a6d..2eac3fc9303d 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -255,4 +255,18 @@ int __bits_per(unsigned long n) ) : \ __bits_per(n) \ ) + +/** + * max_pow_of_two_factor - return highest power-of-2 factor + * @n: parameter + * + * find highest power-of-2 which is evenly divisible into n. + * 0 is returned for n == 0 or 1. + */ +static inline __attribute__((const)) +unsigned int max_pow_of_two_factor(unsigned int n) +{ + return n & -n; +} + #endif /* _LINUX_LOG2_H */ -- cgit v1.2.3 From 962fb1f651c2cf2083e0c3ef53ba69e3b96d3fbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jul 2025 08:43:42 +0100 Subject: rxrpc: Fix recv-recv race of completed call If a call receives an event (such as incoming data), the call gets placed on the socket's queue and a thread in recvmsg can be awakened to go and process it. Once the thread has picked up the call off of the queue, further events will cause it to be requeued, and once the socket lock is dropped (recvmsg uses call->user_mutex to allow the socket to be used in parallel), a second thread can come in and its recvmsg can pop the call off the socket queue again. In such a case, the first thread will be receiving stuff from the call and the second thread will be blocked on call->user_mutex. The first thread can, at this point, process both the event that it picked call for and the event that the second thread picked the call for and may see the call terminate - in which case the call will be "released", decoupling the call from the user call ID assigned to it (RXRPC_USER_CALL_ID in the control message). The first thread will return okay, but then the second thread will wake up holding the user_mutex and, if it sees that the call has been released by the first thread, it will BUG thusly: kernel BUG at net/rxrpc/recvmsg.c:474! Fix this by just dequeuing the call and ignoring it if it is seen to be already released. We can't tell userspace about it anyway as the user call ID has become stale. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Reported-by: Junvyyang, Tencent Zhuque Lab Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: LePremierHomme cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250717074350.3767366-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 378d2dfc7392..e7dcfb1369b6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -330,12 +330,15 @@ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ + EM(rxrpc_call_see_already_released, "SEE alrdy-rl") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \ + EM(rxrpc_call_see_discard, "SEE discard ") \ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ -- cgit v1.2.3 From 2fd895842d49c23137ae48252dd211e5d6d8a3ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jul 2025 08:43:43 +0100 Subject: rxrpc: Fix notification vs call-release vs recvmsg When a call is released, rxrpc takes the spinlock and removes it from ->recvmsg_q in an effort to prevent racing recvmsg() invocations from seeing the same call. Now, rxrpc_recvmsg() only takes the spinlock when actually removing a call from the queue; it doesn't, however, take it in the lead up to that when it checks to see if the queue is empty. It *does* hold the socket lock, which prevents a recvmsg/recvmsg race - but this doesn't prevent sendmsg from ending the call because sendmsg() drops the socket lock and relies on the call->user_mutex. Fix this by firstly removing the bit in rxrpc_release_call() that dequeues the released call and, instead, rely on recvmsg() to simply discard released calls (done in a preceding fix). Secondly, rxrpc_notify_socket() is abandoned if the call is already marked as released rather than trying to be clever by setting both pointers in call->recvmsg_link to NULL to trick list_empty(). This isn't perfect and can still race, resulting in a released call on the queue, but recvmsg() will now clean that up. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Junvyyang, Tencent Zhuque Lab cc: LePremierHomme cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250717074350.3767366-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e7dcfb1369b6..de6f6d25767c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -322,10 +322,10 @@ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ - EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ @@ -338,6 +338,7 @@ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ -- cgit v1.2.3 From dfe25fbaedfc2a07281ed5ff0442270217d25b31 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 4 Jul 2025 11:08:04 -0700 Subject: cgroup: llist: avoid memory tears for llist_node Before the commit 36df6e3dbd7e ("cgroup: make css_rstat_updated nmi safe"), the struct llist_node is expected to be private to the one inserting the node to the lockless list or the one removing the node from the lockless list. After the mentioned commit, the llist_node in the rstat code is per-cpu shared between the stacked contexts i.e. process, softirq, hardirq & nmi. It is possible the compiler may tear the loads or stores of llist_node. Let's avoid that. KCSAN reported the following race: Reported by Kernel Concurrency Sanitizer on: CPU: 60 UID: 0 PID: 5425 ... 6.16.0-rc3-next-20250626 #1 NONE Tainted: [E]=UNSIGNED_MODULE Hardware name: ... ================================================================== ================================================================== BUG: KCSAN: data-race in css_rstat_flush / css_rstat_updated write to 0xffffe8fffe1c85f0 of 8 bytes by task 1061 on cpu 1: css_rstat_flush+0x1b8/0xeb0 __mem_cgroup_flush_stats+0x184/0x190 flush_memcg_stats_dwork+0x22/0x50 process_one_work+0x335/0x630 worker_thread+0x5f1/0x8a0 kthread+0x197/0x340 ret_from_fork+0xd3/0x110 ret_from_fork_asm+0x11/0x20 read to 0xffffe8fffe1c85f0 of 8 bytes by task 3551 on cpu 15: css_rstat_updated+0x81/0x180 mod_memcg_lruvec_state+0x113/0x2d0 __mod_lruvec_state+0x3d/0x50 lru_add+0x21e/0x3f0 folio_batch_move_lru+0x80/0x1b0 __folio_batch_add_and_move+0xd7/0x160 folio_add_lru_vma+0x42/0x50 do_anonymous_page+0x892/0xe90 __handle_mm_fault+0xfaa/0x1520 handle_mm_fault+0xdc/0x350 do_user_addr_fault+0x1dc/0x650 exc_page_fault+0x5c/0x110 asm_exc_page_fault+0x22/0x30 value changed: 0xffffe8fffe18e0d0 -> 0xffffe8fffe1c85f0 $ ./scripts/faddr2line vmlinux css_rstat_flush+0x1b8/0xeb0 css_rstat_flush+0x1b8/0xeb0: init_llist_node at include/linux/llist.h:86 (inlined by) llist_del_first_init at include/linux/llist.h:308 (inlined by) css_process_update_tree at kernel/cgroup/rstat.c:148 (inlined by) css_rstat_updated_list at kernel/cgroup/rstat.c:258 (inlined by) css_rstat_flush at kernel/cgroup/rstat.c:389 $ ./scripts/faddr2line vmlinux css_rstat_updated+0x81/0x180 css_rstat_updated+0x81/0x180: css_rstat_updated at kernel/cgroup/rstat.c:90 (discriminator 1) These are expected race and a simple READ_ONCE/WRITE_ONCE resolves these reports. However let's add comments to explain the race and the need for memory barriers if stronger guarantees are needed. More specifically the rstat updater and the flusher can race and cause a scenario where the stats updater skips adding the css to the lockless list but the flusher might not see those updates done by the skipped updater. This is benign race and the subsequent flusher will flush those stats and at the moment there aren't any rstat users which are not fine with this kind of race. However some future user might want more stricter guarantee, so let's add appropriate comments to ease the job of future users. Signed-off-by: Shakeel Butt Reviewed-by: Paul E. McKenney Fixes: 36df6e3dbd7e ("cgroup: make css_rstat_updated nmi safe") Signed-off-by: Tejun Heo --- include/linux/llist.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/llist.h b/include/linux/llist.h index 27b17f64bcee..607b2360c938 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -83,7 +83,7 @@ static inline void init_llist_head(struct llist_head *list) */ static inline void init_llist_node(struct llist_node *node) { - node->next = node; + WRITE_ONCE(node->next, node); } /** @@ -97,7 +97,7 @@ static inline void init_llist_node(struct llist_node *node) */ static inline bool llist_on_list(const struct llist_node *node) { - return node->next != node; + return READ_ONCE(node->next) != node; } /** @@ -220,7 +220,7 @@ static inline bool llist_empty(const struct llist_head *head) static inline struct llist_node *llist_next(struct llist_node *node) { - return node->next; + return READ_ONCE(node->next); } /** -- cgit v1.2.3 From fc6f89dc707838564abbb8e22dad8e4d75c7fa26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Jul 2025 15:47:29 -0700 Subject: stop_machine: Improve kernel-doc function-header comments Add more detail to the kernel-doc function-header comments for stop_machine(), stop_machine_cpuslocked(), and stop_core_cpuslocked(). Signed-off-by: Paul E. McKenney --- include/linux/stop_machine.h | 64 ++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 3132262a404d..72820503514c 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -88,55 +88,73 @@ static inline void print_stop_info(const char *log_lvl, struct task_struct *task #endif /* CONFIG_SMP */ /* - * stop_machine "Bogolock": stop the entire machine, disable - * interrupts. This is a very heavy lock, which is equivalent to - * grabbing every spinlock (and more). So the "read" side to such a - * lock is anything which disables preemption. + * stop_machine "Bogolock": stop the entire machine, disable interrupts. + * This is a very heavy lock, which is equivalent to grabbing every raw + * spinlock (and more). So the "read" side to such a lock is anything + * which disables preemption. */ #if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU) /** * stop_machine: freeze the machine on all CPUs and run this function * @fn: the function to run - * @data: the data ptr for the @fn() - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) + * @data: the data ptr to pass to @fn() + * @cpus: the cpus to run @fn() on (NULL = run on each online CPU) * - * Description: This causes a thread to be scheduled on every cpu, - * each of which disables interrupts. The result is that no one is - * holding a spinlock or inside any other preempt-disabled region when - * @fn() runs. + * Description: This causes a thread to be scheduled on every CPU, which + * will run with interrupts disabled. Each CPU specified by @cpus will + * run @fn. While @fn is executing, there will no other CPUs holding + * a raw spinlock or running within any other type of preempt-disabled + * region of code. * - * This can be thought of as a very heavy write lock, equivalent to - * grabbing every spinlock in the kernel. + * When @cpus specifies only a single CPU, this can be thought of as + * a reader-writer lock where readers disable preemption (for example, + * by holding a raw spinlock) and where the insanely heavy writers run + * @fn while also preventing any other CPU from doing any useful work. + * These writers can also be thought of as having implicitly grabbed every + * raw spinlock in the kernel. * - * Protects against CPU hotplug. + * When @fn is a no-op, this can be thought of as an RCU implementation + * where readers again disable preemption and writers use stop_machine() + * in place of synchronize_rcu(), albeit with orders of magnitude more + * disruption than even that of synchronize_rcu_expedited(). + * + * Although only one stop_machine() operation can proceed at a time, + * the possibility of blocking in cpus_read_lock() means that the caller + * cannot usefully rely on this serialization. + * + * Return: 0 if all invocations of @fn return zero. Otherwise, the + * value returned by an arbitrarily chosen member of the set of calls to + * @fn that returned non-zero. */ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); /** * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function * @fn: the function to run - * @data: the data ptr for the @fn() - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) + * @data: the data ptr to pass to @fn() + * @cpus: the cpus to run @fn() on (NULL = run on each online CPU) + * + * Same as above. Avoids nested calls to cpus_read_lock(). * - * Same as above. Must be called from with in a cpus_read_lock() protected - * region. Avoids nested calls to cpus_read_lock(). + * Context: Must be called from within a cpus_read_lock() protected region. */ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); /** * stop_core_cpuslocked: - stop all threads on just one core * @cpu: any cpu in the targeted core - * @fn: the function to run - * @data: the data ptr for @fn() + * @fn: the function to run on each CPU in the core containing @cpu + * @data: the data ptr to pass to @fn() * - * Same as above, but instead of every CPU, only the logical CPUs of a - * single core are affected. + * Same as above, but instead of every CPU, only the logical CPUs of the + * single core containing @cpu are affected. * * Context: Must be called from within a cpus_read_lock() protected region. * - * Return: 0 if all executions of @fn returned 0, any non zero return - * value if any returned non zero. + * Return: 0 if all invocations of @fn return zero. Otherwise, the + * value returned by an arbitrarily chosen member of the set of calls to + * @fn that returned non-zero. */ int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data); -- cgit v1.2.3 From 76720eed7d18baf51c0f31fe8a3784702f50e3fc Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Jul 2025 12:38:04 -0500 Subject: PCI: Add pci_is_display() to check if device is a display controller Several places in the kernel do class shifting to match whether a PCI device is display class. Add pci_is_display() for those places to use. Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Reviewed-by: Daniel Dadap Reviewed-by: Simona Vetter Link: https://patch.msgid.link/20250717173812.3633478-2-superm1@kernel.org --- include/linux/pci.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..4fff6405a830 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -744,6 +744,21 @@ static inline bool pci_is_vga(struct pci_dev *pdev) return false; } +/** + * pci_is_display - check if the PCI device is a display controller + * @pdev: PCI device + * + * Determine whether the given PCI device corresponds to a display + * controller. Display controllers are typically used for graphical output + * and are identified based on their class code. + * + * Return: true if the PCI device is a display controller, false otherwise. + */ +static inline bool pci_is_display(struct pci_dev *pdev) +{ + return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else -- cgit v1.2.3 From c0ae03588bbb95378758fe80e7436a9b4cfc71f6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Jul 2025 17:03:21 -0700 Subject: ethtool: rss: initial RSS_SET (indirection table handling) Add initial support for RSS_SET, for now only operations on the indirection table are supported. Unlike the ioctl don't check if at least one parameter is being changed. This is how other ethtool-nl ops behave, so pick the ethtool-nl consistency vs copying ioctl behavior. There are two special cases here: 1) resetting the table to defaults; 2) support for tables of different size. For (1) I use an empty Netlink attribute (array of size 0). (2) may require some background. AFAICT a lot of modern devices allow allocating RSS tables of different sizes. mlx5 can upsize its tables, bnxt has some "table size calculation", and Intel folks asked about RSS table sizing in context of resource allocation in the past. The ethtool IOCTL API has a concept of table size, but right now the user is expected to provide a table exactly the size the device requests. Some drivers may change the table size at runtime (in response to queue count changes) but the user is not in control of this. What's not great is that all RSS contexts share the same table size. For example a device with 128 queues enabled, 16 RSS contexts 8 queues in each will likely have 256 entry tables for each of the 16 contexts, while 32 would be more than enough given each context only has 8 queues. To address this the Netlink API should avoid enforcing table size at the uAPI level, and should allow the user to express the min table size they expect. To fully solve (2) we will need more driver plumbing but at the uAPI level this patch allows the user to specify a table size smaller than what the device advertises. The device table size must be a multiple of the user requested table size. We then replicate the user-provided table to fill the full device size table. This addresses the "allow the user to express the min table size" objective, while not enforcing any fixed size. From Netlink perspective .get_rxfh_indir_size() is now de facto the "max" table size supported by the device. We may choose to support table replication in ethtool, too, when we actually plumb this thru the device APIs. Initially I was considering moving full pattern generation to the kernel (which queues to use, at which frequency and what min sequence length). I don't think this complexity would buy us much and most if not all devices have pow-2 table sizes, which simplifies the replication a lot. Reviewed-by: Gal Pressman Link: https://patch.msgid.link/20250716000331.1378807-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 96027e26ffba..130bdf5c3516 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -840,6 +840,7 @@ enum { ETHTOOL_MSG_PHY_GET, ETHTOOL_MSG_TSCONFIG_GET, ETHTOOL_MSG_TSCONFIG_SET, + ETHTOOL_MSG_RSS_SET, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) -- cgit v1.2.3 From e804bd83c1fd7e1f03899c948812ebc207ac5a7e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Jul 2025 22:08:10 +0000 Subject: neighbour: Split pneigh_lookup(). pneigh_lookup() has ASSERT_RTNL() in the middle of the function, which is confusing. When called with the last argument, creat, 0, pneigh_lookup() literally looks up a proxy neighbour entry. This is the case of the reader path as the fast path and RTM_GETNEIGH. pneigh_lookup(), however, creates a pneigh_entry when called with creat 1 from RTM_NEWNEIGH and SIOCSARP, which require RTNL. Let's split pneigh_lookup() into two functions. We will convert all the reader paths to RCU, and read_lock_bh(&tbl->lock) in the new pneigh_lookup() will be dropped. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250716221221.442239-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7e865b14749d..7f3d57da5689 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -376,10 +376,11 @@ unsigned long neigh_rand_reach_time(unsigned long base); void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, - const void *key, struct net_device *dev, - int creat); + const void *key, struct net_device *dev); struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); +struct pneigh_entry *pneigh_create(struct neigh_table *tbl, struct net *net, + const void *key, struct net_device *dev); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); -- cgit v1.2.3 From d63382aea70aa4ecb516126e00930bc8ab5e55ef Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Jul 2025 22:08:11 +0000 Subject: neighbour: Annotate neigh_table.phash_buckets and pneigh_entry.next with __rcu. The next patch will free pneigh_entry with call_rcu(). Then, we need to annotate neigh_table.phash_buckets[] and pneigh_entry.next with __rcu. To make the next patch cleaner, let's annotate the fields in advance. Currently, all accesses to the fields are under the neigh table lock, so rcu_dereference_protected() is used with 1 for now, but most of them (except in pneigh_delete() and pneigh_ifdown_and_unlock()) will be replaced with rcu_dereference() and rcu_dereference_check(). Note that pneigh_ifdown_and_unlock() changes pneigh_entry.next to a local list, which is illegal because the RCU iterator could be moved to another list. This part will be fixed in the next patch. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250716221221.442239-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7f3d57da5689..1ddc44a04200 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -176,7 +176,7 @@ struct neigh_ops { }; struct pneigh_entry { - struct pneigh_entry *next; + struct pneigh_entry __rcu *next; possible_net_t net; struct net_device *dev; netdevice_tracker dev_tracker; @@ -236,7 +236,7 @@ struct neigh_table { unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; - struct pneigh_entry **phash_buckets; + struct pneigh_entry __rcu **phash_buckets; }; static inline int neigh_parms_family(struct neigh_parms *p) -- cgit v1.2.3 From d539d8fbd8fcf64a1492c51f5ee99aaa8a8dc9ab Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Jul 2025 22:08:12 +0000 Subject: neighbour: Free pneigh_entry after RCU grace period. We will convert RTM_GETNEIGH to RCU. neigh_get() looks up pneigh_entry by pneigh_lookup() and passes it to pneigh_fill_info(). Then, we must ensure that the entry is alive till pneigh_fill_info() completes, but read_lock_bh(&tbl->lock) in pneigh_lookup() does not guarantee that. Also, we will convert all readers of tbl->phash_buckets[] to RCU. Let's use call_rcu() to free pneigh_entry and update phash_buckets[] and ->next by rcu_assign_pointer(). pneigh_ifdown_and_unlock() uses list_head to avoid overwriting ->next and moving RCU iterators to another list. pndisc_destructor() (only IPv6 ndisc uses this) uses a mutex, so it is not delayed to call_rcu(), where we cannot sleep. This is fine because the mcast code works with RCU and ipv6_dev_mc_dec() frees mcast objects after RCU grace period. While at it, we change the return type of pneigh_ifdown_and_unlock() to void. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250716221221.442239-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 1ddc44a04200..6d7f9aa53a7a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,6 +180,10 @@ struct pneigh_entry { possible_net_t net; struct net_device *dev; netdevice_tracker dev_tracker; + union { + struct list_head free_node; + struct rcu_head rcu; + }; u32 flags; u8 protocol; bool permanent; -- cgit v1.2.3 From dd103c9a53752d3754a3182ec8dd97885680cfe2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Jul 2025 22:08:17 +0000 Subject: neighbour: Remove __pneigh_lookup(). __pneigh_lookup() is the lockless version of pneigh_lookup(), but its only caller pndisc_is_router() holds the table lock and reads pneigh_netry.flags. This is because accessing pneigh_entry after pneigh_lookup() was illegal unless the caller holds RTNL or the table lock. Now, pneigh_entry is guaranteed to be alive during the RCU critical section. Let's call pneigh_lookup() and use READ_ONCE() for n->flags in pndisc_is_router() and remove __pneigh_lookup(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250716221221.442239-13-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6d7f9aa53a7a..f8c7261cd4eb 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -381,8 +381,6 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); -struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, - const void *key, struct net_device *dev); struct pneigh_entry *pneigh_create(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, -- cgit v1.2.3 From 13a936bb99fb6385dc8620d24d7111e514448371 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Jul 2025 22:08:19 +0000 Subject: neighbour: Protect tbl->phash_buckets[] with a dedicated mutex. tbl->phash_buckets[] is only modified in the slow path by pneigh_create() and pneigh_delete() under the table lock. Both of them are called under RTNL, so no extra lock is needed, but we will remove RTNL from the paths. pneigh_create() looks up a pneigh_entry, and this part can be lockless, but it would complicate the logic like 1. lookup 2. allocate pengih_entry for GFP_KERNEL 3. lookup again but under lock 4. if found, return it after freeing the allocated memory 5. else, return the new one Instead, let's add a per-table mutex and run lookup and allocation under it. Note that updating pneigh_entry part in neigh_add() is still protected by RTNL and will be moved to pneigh_create() in the next patch. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250716221221.442239-15-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index f8c7261cd4eb..f333f9ebc425 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -240,6 +240,7 @@ struct neigh_table { unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; + struct mutex phash_lock; struct pneigh_entry __rcu **phash_buckets; }; -- cgit v1.2.3 From dc2a27e524ac13e7a599bc693934ed81f868dc2d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Jul 2025 22:08:20 +0000 Subject: neighbour: Update pneigh_entry in pneigh_create(). neigh_add() updates pneigh_entry() found or created by pneigh_create(). This update is serialised by RTNL, but we will remove it. Let's move the update part to pneigh_create() and make it return errno instead of a pointer of pneigh_entry. Now, the pneigh code is RTNL free. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250716221221.442239-16-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index f333f9ebc425..4a30bd458c5a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -382,8 +382,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); -struct pneigh_entry *pneigh_create(struct neigh_table *tbl, struct net *net, - const void *key, struct net_device *dev); +int pneigh_create(struct neigh_table *tbl, struct net *net, const void *key, + struct net_device *dev, u32 flags, u8 protocol, + bool permanent); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); -- cgit v1.2.3 From 2d8ae9a4f1bc04a118e3d438ac50dd49281b34fd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 11 Jul 2025 11:55:14 +0300 Subject: string: Group str_has_prefix() and strstarts() The two str_has_prefix() and strstarts() are about the same with a slight difference on what they return. Group them in the header. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250711085514.1294428-1-andriy.shevchenko@linux.intel.com Signed-off-by: Kees Cook --- include/linux/string.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 01621ad0f598..fdd3442c6bcb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -345,16 +345,6 @@ extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, int ptr_to_hashval(const void *ptr, unsigned long *hashval_out); -/** - * strstarts - does @str start with @prefix? - * @str: string to examine - * @prefix: prefix to look for. - */ -static inline bool strstarts(const char *str, const char *prefix) -{ - return strncmp(str, prefix, strlen(prefix)) == 0; -} - size_t memweight(const void *ptr, size_t bytes); /** @@ -562,4 +552,14 @@ static __always_inline size_t str_has_prefix(const char *str, const char *prefix return strncmp(str, prefix, len) == 0 ? len : 0; } +/** + * strstarts - does @str start with @prefix? + * @str: string to examine + * @prefix: prefix to look for. + */ +static inline bool strstarts(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} + #endif /* _LINUX_STRING_H_ */ -- cgit v1.2.3 From b2df55a98672f4be076ff69d0f0d0b1fc81f2044 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Jul 2025 09:30:36 -0700 Subject: cleanup: Fix documentation build error for ACQUIRE updates Stephen reports: Documentation/core-api/cleanup:7: include/linux/cleanup.h:73: ERROR: Unexpected indentation. [docutils] Documentation/core-api/cleanup:7: include/linux/cleanup.h:74: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils] Which points out that the ACQUIRE() example in cleanup.h missed the "::" suffix to mark the following text as a code-block. Fixes: 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") Reported-by: Stephen Rothwell Closes: http://lore.kernel.org/20250717173354.34375751@canb.auug.org.au Signed-off-by: Dan Williams Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20250717163036.1275791-1-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- include/linux/cleanup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 4eb83dd71cfe..0fb796db4811 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -64,8 +64,7 @@ * the remainder of "func()". * * The ACQUIRE() macro can be used in all places that guard() can be - * used and additionally support conditional locks - * + * used and additionally support conditional locks:: * * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) * ... -- cgit v1.2.3 From 4e301d858af17ae2ce56886296e5458c5a08219a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 7 Jun 2025 13:53:03 +0200 Subject: fs: constify file ptr in backing_file accessor helpers Add internal helper backing_file_set_user_path() for the only two cases that need to modify backing_file fields. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250607115304.2521155-2-amir73il@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1d9586a78041..8116b1080457 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2864,7 +2864,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct path *backing_file_user_path(struct file *f); +struct path *backing_file_user_path(const struct file *f); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file @@ -2876,14 +2876,14 @@ struct path *backing_file_user_path(struct file *f); * by fstat() on that same fd. */ /* Get the path to display in /proc//maps */ -static inline const struct path *file_user_path(struct file *f) +static inline const struct path *file_user_path(const struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return backing_file_user_path(f); return &f->f_path; } /* Get the inode whose inode number to display in /proc//maps */ -static inline const struct inode *file_user_inode(struct file *f) +static inline const struct inode *file_user_inode(const struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return d_inode(backing_file_user_path(f)->dentry); -- cgit v1.2.3 From 962ddc5a7a4b04c007bba0f3e7298cda13c62efd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 8 Jul 2025 17:59:54 -0700 Subject: crypto: acomp - Fix CFI failure due to type punning To avoid a crash when control flow integrity is enabled, make the workspace ("stream") free function use a consistent type, and call it through a function pointer that has that same type. Fixes: 42d9f6c77479 ("crypto: acomp - Move scomp stream allocation code into acomp") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- include/crypto/internal/acompress.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index ffffd88bbbad..2d97440028ff 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -63,10 +63,7 @@ struct crypto_acomp_stream { struct crypto_acomp_streams { /* These must come first because of struct scomp_alg. */ void *(*alloc_ctx)(void); - union { - void (*free_ctx)(void *); - void (*cfree_ctx)(const void *); - }; + void (*free_ctx)(void *); struct crypto_acomp_stream __percpu *streams; struct work_struct stream_work; -- cgit v1.2.3 From c470ffa6f48619e8ea2442206b31b7965f8826a5 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 11 Jul 2025 21:29:31 +0300 Subject: crypto: engine - remove request batching support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove request batching support from crypto_engine, as there are no drivers using this feature and it doesn't really work that well. Instead of doing batching based on backlog, a more optimal approach would be for the user to handle the batching (similar to how IPsec can hook into GSO to get 64K of data each time or how block encryption can use unit sizes much greater than 4K). Suggested-by: Herbert Xu Signed-off-by: Ovidiu Panait Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- include/crypto/engine.h | 1 - include/crypto/internal/engine.h | 4 ---- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/crypto/engine.h b/include/crypto/engine.h index 545dbefe3e13..2e60344437da 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -76,7 +76,6 @@ int crypto_engine_stop(struct crypto_engine *engine); struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt); struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, bool retry_support, - int (*cbk_do_batch)(struct crypto_engine *engine), bool rt, int qlen); void crypto_engine_exit(struct crypto_engine *engine); diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h index b6a4ea2240fc..8da1a13619c9 100644 --- a/include/crypto/internal/engine.h +++ b/include/crypto/internal/engine.h @@ -37,8 +37,6 @@ struct device; * @unprepare_crypt_hardware: there are currently no more requests on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call - * @do_batch_requests: execute a batch of requests. Depends on multiple - * requests support. * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data @@ -60,8 +58,6 @@ struct crypto_engine { int (*prepare_crypt_hardware)(struct crypto_engine *engine); int (*unprepare_crypt_hardware)(struct crypto_engine *engine); - int (*do_batch_requests)(struct crypto_engine *engine); - struct kthread_worker *kworker; struct kthread_work pump_requests; -- cgit v1.2.3 From 5eb32430df783e212ffed8d35cc494a8941cda0a Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 11 Jul 2025 21:29:32 +0300 Subject: crypto: engine - remove {prepare,unprepare}_crypt_hardware callbacks The {prepare,unprepare}_crypt_hardware callbacks were added back in 2016 by commit 735d37b5424b ("crypto: engine - Introduce the block request crypto engine framework"), but they were never implemented by any driver. Remove them as they are unused. Since the 'engine->idling' and 'was_busy' flags are no longer needed, remove them as well. Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu --- include/crypto/internal/engine.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h index 8da1a13619c9..f19ef376833f 100644 --- a/include/crypto/internal/engine.h +++ b/include/crypto/internal/engine.h @@ -21,7 +21,6 @@ struct device; /* * struct crypto_engine - crypto hardware engine * @name: the engine name - * @idling: the engine is entering idle state * @busy: request pump is busy * @running: the engine is on working * @retry_support: indication that the hardware allows re-execution @@ -31,12 +30,6 @@ struct device; * @list: link with the global crypto engine list * @queue_lock: spinlock to synchronise access to request queue * @queue: the crypto queue of the engine - * @prepare_crypt_hardware: a request will soon arrive from the queue - * so the subsystem requests the driver to prepare the hardware - * by issuing this call - * @unprepare_crypt_hardware: there are currently no more requests on the - * queue so the subsystem notifies the driver that it may relax the - * hardware by issuing this call * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data @@ -44,7 +37,6 @@ struct device; */ struct crypto_engine { char name[ENGINE_NAME_LEN]; - bool idling; bool busy; bool running; @@ -56,9 +48,6 @@ struct crypto_engine { struct crypto_queue queue; struct device *dev; - int (*prepare_crypt_hardware)(struct crypto_engine *engine); - int (*unprepare_crypt_hardware)(struct crypto_engine *engine); - struct kthread_worker *kworker; struct kthread_work pump_requests; -- cgit v1.2.3 From 9b7fc3f14576c268f62fe0b882fac5e61239b659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:58:04 +0200 Subject: vdso: Introduce aux_clock_resolution_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the constant resolution to a shared header, so the vDSO can use it and return it without going through a syscall. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-10-df7d9f87b9b8@linutronix.de --- include/vdso/auxclock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/vdso/auxclock.h (limited to 'include') diff --git a/include/vdso/auxclock.h b/include/vdso/auxclock.h new file mode 100644 index 000000000000..6d6e74cbc400 --- /dev/null +++ b/include/vdso/auxclock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VDSO_AUXCLOCK_H +#define _VDSO_AUXCLOCK_H + +#include +#include + +static __always_inline u64 aux_clock_resolution_ns(void) +{ + return 1; +} + +#endif /* _VDSO_AUXCLOCK_H */ -- cgit v1.2.3 From 380b84e168e57c54d0a9e053a5558fddc43f0c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:58:05 +0200 Subject: vdso/vsyscall: Update auxiliary clock data in the datapage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the auxiliary clock data so it can be read from the vDSO. Architectures not using the generic vDSO time framework, namely SPARC64, are not supported. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-11-df7d9f87b9b8@linutronix.de --- include/linux/timekeeper_internal.h | 6 ++++++ include/vdso/datapage.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index ca79938b62f3..c27aac67cb3f 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -190,4 +190,10 @@ static inline void update_vsyscall_tz(void) } #endif +#if defined(CONFIG_GENERIC_GETTIMEOFDAY) && defined(CONFIG_POSIX_AUX_CLOCKS) +extern void vdso_time_update_aux(struct timekeeper *tk); +#else +static inline void vdso_time_update_aux(struct timekeeper *tk) { } +#endif + #endif /* _LINUX_TIMEKEEPER_INTERNAL_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 1864e76e8f69..f4c96d9ce674 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -38,6 +38,7 @@ struct vdso_arch_data { #endif #define VDSO_BASES (CLOCK_TAI + 1) +#define VDSO_BASE_AUX 0 #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ BIT(CLOCK_BOOTTIME) | \ @@ -117,6 +118,7 @@ struct vdso_clock { * @arch_data: architecture specific data (optional, defaults * to an empty struct) * @clock_data: clocksource related data (array) + * @aux_clock_data: auxiliary clocksource related data (array) * @tz_minuteswest: minutes west of Greenwich * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution @@ -133,6 +135,7 @@ struct vdso_time_data { struct arch_vdso_time_data arch_data; struct vdso_clock clock_data[CS_BASES]; + struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS]; s32 tz_minuteswest; s32 tz_dsttime; -- cgit v1.2.3 From cd3557a7618bf5c1935e9f66b58a329f1f1f4b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:58:06 +0200 Subject: vdso/gettimeofday: Add support for auxiliary clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the auxiliary clocks through the vDSO. Architectures not using the generic vDSO time framework, namely SPARC64, are not supported. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-12-df7d9f87b9b8@linutronix.de --- include/vdso/datapage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index f4c96d9ce674..02533038640e 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include +#include #include #include #include @@ -46,6 +47,7 @@ struct vdso_arch_data { #define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \ BIT(CLOCK_MONOTONIC_COARSE)) #define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW)) +#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX) #define CS_HRES_COARSE 0 #define CS_RAW 1 -- cgit v1.2.3 From 78e50d88998a4a634eeda3e0d136cb8b9c9bc9d8 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 12 Jul 2025 18:53:07 -0300 Subject: wifi: brcmfmac: Add support for the SDIO 43751 device Add the SDIO ID and firmware matching for the 43751 device. Based on the previous work from Marc Gonzalez . Tested on an i.MX6DL board connected to an AP6398SV chip with the brcmfmac43752-sdio.bin firmware taken from: https://source.puri.sm/Librem5/firmware-brcm43752-nonfree Signed-off-by: Fabio Estevam Acked-by: Arend van Spriel > Link: https://patch.msgid.link/20250712215307.1310802-1-festevam@gmail.com Signed-off-by: Johannes Berg --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 7cddfdac2f57..fe3d6d98f8da 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -76,6 +76,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_43439 0xa9af #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf +#define SDIO_DEVICE_ID_BROADCOM_43751 0xaae7 #define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752 0xaae8 #define SDIO_VENDOR_ID_CYPRESS 0x04b4 -- cgit v1.2.3 From 6624a0af82a6e3a4d3609264ef591a8fa3467139 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 17 Jul 2025 17:42:02 +1000 Subject: wifi: cfg80211: support configuring an S1G short beaconing BSS S1G short beacons are an optional frame type used in an S1G BSS that contain a limited set of elements. While they are optional, they are a fundamental part of S1G that enables significant power saving. Expose 2 additional netlink attributes, NL80211_ATTR_S1G_LONG_BEACON_PERIOD which denotes the number of beacon intervals between each long beacon and NL80211_ATTR_S1G_SHORT_BEACON which is a nested attribute containing the short beacon tail and head. We split them as the long beacon period cannot be updated, and is only used when initialisng the interface, whereas the short beacon data can be used to both initialise and update the templates. This follows how things such as the beacon interval and DTIM period currently operate. During the initialisation path, we ensure we have the long beacon period if the short beacon data is being passed down, whereas the update path will simply update the template if its sent down. The short beacon data is validated using the same routines for regular beacons as they support correctly parsing the short beacon format while ensuring the frame is well-formed. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250717074205.312577-2-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 23 +++++++++++++++++++++++ include/uapi/linux/nl80211.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 17f2a665dce6..44a1055a81ba 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1423,6 +1423,23 @@ struct cfg80211_unsol_bcast_probe_resp { const u8 *tmpl; }; +/** + * struct cfg80211_s1g_short_beacon - S1G short beacon data. + * + * @update: Set to true if the feature configuration should be updated. + * @short_head: Short beacon head. + * @short_tail: Short beacon tail. + * @short_head_len: Short beacon head len. + * @short_tail_len: Short beacon tail len. + */ +struct cfg80211_s1g_short_beacon { + bool update; + const u8 *short_head; + const u8 *short_tail; + size_t short_head_len; + size_t short_tail_len; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -1463,6 +1480,8 @@ struct cfg80211_unsol_bcast_probe_resp { * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters * @mbssid_config: AP settings for multiple bssid + * @s1g_long_beacon_period: S1G long beacon period + * @s1g_short_beacon: S1G short beacon data */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1496,6 +1515,8 @@ struct cfg80211_ap_settings { struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; + u8 s1g_long_beacon_period; + struct cfg80211_s1g_short_beacon s1g_short_beacon; }; @@ -1507,11 +1528,13 @@ struct cfg80211_ap_settings { * @beacon: beacon data * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters + * @s1g_short_beacon: S1G short beacon data */ struct cfg80211_ap_update { struct cfg80211_beacon_data beacon; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; + struct cfg80211_s1g_short_beacon s1g_short_beacon; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 39460334dafb..d1a14f2892d9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2915,6 +2915,19 @@ enum nl80211_commands { * applicable to that specific radio only. If the radio id is greater * thank the number of radios, error denoting invalid value is returned. * + * @NL80211_ATTR_S1G_LONG_BEACON_PERIOD: (u8) Integer attribute that represents + * the number of beacon intervals between each long beacon transmission + * for an S1G BSS with short beaconing enabled. This is a required + * attribute for initialising an S1G short beaconing BSS. When updating + * the short beacon data, this is not required. It has a minimum value of + * 2 (i.e 2 beacon intervals). + * + * @NL80211_ATTR_S1G_SHORT_BEACON: Nested attribute containing the short beacon + * head and tail used to set or update the short beacon templates. When + * bringing up a new interface, %NL80211_ATTR_S1G_LONG_BEACON_PERIOD is + * required alongside this attribute. Refer to + * @enum nl80211_s1g_short_beacon_attrs for the attribute definitions. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3474,6 +3487,9 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_RADIO_INDEX, + NL80211_ATTR_S1G_LONG_BEACON_PERIOD, + NL80211_ATTR_S1G_SHORT_BEACON, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8148,4 +8164,27 @@ enum nl80211_wiphy_radio_freq_range { NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1, }; +/** + * enum nl80211_s1g_short_beacon_attrs - S1G short beacon data + * + * @__NL80211_S1G_SHORT_BEACON_ATTR_INVALID: Invalid + * + * @NL80211_S1G_SHORT_BEACON_ATTR_HEAD: Short beacon head (binary). + * @NL80211_S1G_SHORT_BEACON_ATTR_TAIL: Short beacon tail (binary). + * + * @__NL80211_S1G_SHORT_BEACON_ATTR_LAST: Internal + * @NL80211_S1G_SHORT_BEACON_ATTR_MAX: Highest attribute + */ +enum nl80211_s1g_short_beacon_attrs { + __NL80211_S1G_SHORT_BEACON_ATTR_INVALID, + + NL80211_S1G_SHORT_BEACON_ATTR_HEAD, + NL80211_S1G_SHORT_BEACON_ATTR_TAIL, + + /* keep last */ + __NL80211_S1G_SHORT_BEACON_ATTR_LAST, + NL80211_S1G_SHORT_BEACON_ATTR_MAX = + __NL80211_S1G_SHORT_BEACON_ATTR_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From bbf93a06d73505591db3a93797f44b9c44555d9b Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 17 Jul 2025 17:42:03 +1000 Subject: wifi: mac80211: support initialising an S1G short beaconing BSS Introduce the ability to parse the short beacon data and long beacon period. The long beacon period represents the number of beacon intervals between each long beacon transmission. Additionally, as a BSS cannot change its configuration such that short beaconing is dynamically disabled/enabled without tearing down the interface - we ensure we have an existing short beacon before performing the update. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250717074205.312577-3-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 577fd6a8c372..a2dbaad2f6d3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -758,6 +758,8 @@ struct ieee80211_parsed_tpe { * be updated to 1, even if bss_param_ch_cnt didn't change. This allows * the link to know that it heard the latest value from its own beacon * (as opposed to hearing its value from another link's beacon). + * @s1g_long_beacon_period: number of beacon intervals between each long + * beacon transmission. */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -857,6 +859,8 @@ struct ieee80211_bss_conf { u8 bss_param_ch_cnt; u8 bss_param_ch_cnt_link_id; + + u8 s1g_long_beacon_period; }; /** -- cgit v1.2.3 From 1f4f8166110f037f15a89c2203ff887b98a8393a Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 17 Jul 2025 11:18:14 +0100 Subject: cxl/events: Update Common Event Record to CXL spec rev 3.2 CXL spec 3.2 section 8.2.10.2.1 Table 8-55, Common Event Record format defined new fields LD-ID and Head ID. LD-ID: ID of logical device from where the event originated, which is valid only if LD-ID valid flag is set to 1. CXL spec 3.2 Section 2.4 describes, a Type 3 Multi-Logical Device (MLD) can partition its resources into up to 16 isolated Logical Devices. Each Logical Device is identified by a Logical Device Identifier (LD-ID) in CXL.mem and CXL.io protocols. LD-ID is a 16-bit Logical Device identifier applicable for CXL.io and CXL.mem requests and responses. CXL.mem supports only the lower 4 bits of LD-ID and therefore can support up to 16 unique LD-ID values over the link. Requests and responses forwarded over an MLD Port are tagged with LD-ID. Head ID: ID of the device head, from where the event originated, which is valid only if head valid flag is set to 1. Add updates for the above spec changes in the CXL events record and CXL common trace event implementation. Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: Shiju Jose Link: https://patch.msgid.link/20250717101817.2104-2-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- include/cxl/event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/cxl/event.h b/include/cxl/event.h index f9ae1796da85..f4cb8568566b 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -19,7 +19,9 @@ struct cxl_event_record_hdr { __le64 timestamp; u8 maint_op_class; u8 maint_op_sub_class; - u8 reserved[14]; + __le16 ld_id; + u8 head_id; + u8 reserved[11]; } __packed; struct cxl_event_media_hdr { -- cgit v1.2.3 From f10f46a0ee53420f707195fe33b7c235a1c0e48a Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 17 Jul 2025 11:18:17 +0100 Subject: cxl/events: Trace Memory Sparing Event Record CXL rev 3.2 section 8.2.10.2.1.4 Table 8-60 defines the Memory Sparing Event Record. Determine if the event read is memory sparing record and if so trace the record. Memory device shall produce a memory sparing event record 1. After completion of a PPR maintenance operation if the memory sparing event record enable bit is set (Field: sPPR/hPPR Operation Mode in Table 8-128/Table 8-131). 2. In response to a query request by the host (see section 8.2.10.7.1.4) to determine the availability of sparing resources. The device shall report the resource availability by producing the Memory Sparing Event Record (see Table 8-60) in which the channel, rank, nibble mask, bank group, bank, row, column, sub-channel fields are a copy of the values specified in the request. If the controller does not support reporting whether a resource is available, and a perform maintenance operation for memory sparing is issued with query resources set to 1, the controller shall return invalid input. Example trace log for produce memory sparing event record on completion of a soft PPR operation, cxl_memory_sparing: memdev=mem1 host=0000:0f:00.0 serial=3 log=Informational : time=55045163029 uuid=e71f3a40-2d29-4092-8a39-4d1c966c7c65 len=128 flags='0x1' handle=1 related_handle=0 maint_op_class=2 maint_op_sub_class=1 ld_id=0 head_id=0 : flags='' result=0 validity_flags='CHANNEL|RANK|NIBBLE|BANK GROUP|BANK|ROW|COLUMN' spare resource avail=1 channel=2 rank=5 nibble_mask=a59c bank_group=2 bank=4 row=13 column=23 sub_channel=0 comp_id=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 comp_id_pldm_valid_flags='' pldm_entity_id=0x00 pldm_resource_id=0x00 Note: For memory sparing event record, fields 'maintenance operation class' and 'maintenance operation subclass' are defined twice, first in the common event record (Table 8-55) and second in the memory sparing event record (Table 8-60). Thus those in the sparing event record coded as reserved, to be removed when the spec is updated. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Shiju Jose Link: https://patch.msgid.link/20250717101817.2104-5-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- include/cxl/event.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/cxl/event.h b/include/cxl/event.h index f4cb8568566b..6fd90f9cc203 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -110,11 +110,43 @@ struct cxl_event_mem_module { u8 reserved[0x2a]; } __packed; +/* + * Memory Sparing Event Record - MSER + * CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60 + */ +struct cxl_event_mem_sparing { + struct cxl_event_record_hdr hdr; + /* + * The fields maintenance operation class and maintenance operation + * subclass defined in the Memory Sparing Event Record are the + * duplication of the same in the common event record. Thus defined + * as reserved and to be removed after the spec correction. + */ + u8 rsv1; + u8 rsv2; + u8 flags; + u8 result; + __le16 validity_flags; + u8 reserved1[6]; + __le16 res_avail; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + __le16 column; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 sub_channel; + u8 reserved2[0x25]; +} __packed; + union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; + struct cxl_event_mem_sparing mem_sparing; /* dram & gen_media event header */ struct cxl_event_media_hdr media_hdr; } __packed; @@ -133,6 +165,7 @@ enum cxl_event_type { CXL_CPER_EVENT_GEN_MEDIA, CXL_CPER_EVENT_DRAM, CXL_CPER_EVENT_MEM_MODULE, + CXL_CPER_EVENT_MEM_SPARING, }; #define CPER_CXL_DEVICE_ID_VALID BIT(0) -- cgit v1.2.3 From 733c43f1df34f9185b945e6f12ac00c8556c6dfe Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 8 Jul 2025 14:22:10 -0600 Subject: io_uring/cmd: introduce IORING_URING_CMD_REISSUE flag Add a flag IORING_URING_CMD_REISSUE that ->uring_cmd() implementations can use to tell whether this is the first or subsequent issue of the uring_cmd. This will allow ->uring_cmd() implementations to store information in the io_uring_cmd's pdu across issues. Signed-off-by: Caleb Sander Mateos Acked-by: David Sterba Link: https://lore.kernel.org/r/20250708202212.2851548-3-csander@purestorage.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 53408124c1e5..29892f54e0ac 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -8,6 +8,8 @@ /* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ #define IORING_URING_CMD_CANCELABLE (1U << 30) +/* io_uring_cmd is being issued again */ +#define IORING_URING_CMD_REISSUE (1U << 31) struct io_uring_cmd { struct file *file; -- cgit v1.2.3 From 2e6dbb25ea15844c8b617260d635731c37c85ac9 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 8 Jul 2025 14:22:12 -0600 Subject: io_uring/cmd: remove struct io_uring_cmd_data There are no more users of struct io_uring_cmd_data and its op_data field. Remove it to shave 8 bytes from struct io_async_cmd and eliminate a store and load for every uring_cmd. Signed-off-by: Caleb Sander Mateos Acked-by: David Sterba Link: https://lore.kernel.org/r/20250708202212.2851548-5-csander@purestorage.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 29892f54e0ac..cfa6d0c0c322 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -21,10 +21,6 @@ struct io_uring_cmd { u8 pdu[32]; /* available inline for free use */ }; -struct io_uring_cmd_data { - void *op_data; -}; - static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) { return sqe->cmd; @@ -137,11 +133,6 @@ static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd return cmd_to_io_kiocb(cmd)->tctx->task; } -static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_uring_cmd *cmd) -{ - return cmd_to_io_kiocb(cmd)->async_data; -} - /* * Return uring_cmd's context reference as its context handle for driver to * track per-context resource, such as registered kernel IO buffer -- cgit v1.2.3 From 850f14f5b91986e586b66565c9c75bdd4c834571 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:45 +0800 Subject: iommufd: Destroy vdevice on idevice destroy Destroy iommufd_vdevice (vdev) on iommufd_idevice (idev) destruction so that vdev can't outlive idev. idev represents the physical device bound to iommufd, while the vdev represents the virtual instance of the physical device in the VM. The lifecycle of the vdev should not be longer than idev. This doesn't cause real problem on existing use cases cause vdev doesn't impact the physical device, only provides virtualization information. But to extend vdev for Confidential Computing (CC), there are needs to do secure configuration for the vdev, e.g. TSM Bind/Unbind. These configurations should be rolled back on idev destroy, or the external driver (VFIO) functionality may be impact. The idev is created by external driver so its destruction can't fail. The idev implements pre_destroy() op to actively remove its associated vdev before destroying itself. There are 3 cases on idev pre_destroy(): 1. vdev is already destroyed by userspace. No extra handling needed. 2. vdev is still alive. Use iommufd_object_tombstone_user() to destroy vdev and tombstone the vdev ID. 3. vdev is being destroyed by userspace. The vdev ID is already freed, but vdev destroy handler is not completed. This requires multi-threads syncing - vdev holds idev's short term users reference until vdev destruction completes, idev leverages existing wait_shortterm mechanism for syncing. idev should also block any new reference to it after pre_destroy(), or the following wait shortterm would timeout. Introduce a 'destroying' flag, set it to true on idev pre_destroy(). Any attempt to reference idev should honor this flag under the protection of idev->igroup->lock. Link: https://patch.msgid.link/r/20250716070349.1807226-5-yilun.xu@linux.intel.com Originally-by: Nicolin Chen Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Co-developed-by: "Aneesh Kumar K.V (Arm)" Signed-off-by: "Aneesh Kumar K.V (Arm)" Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 1 + include/uapi/linux/iommufd.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index e3a0cd47384d..b88911026bc4 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -108,6 +108,7 @@ struct iommufd_viommu { struct iommufd_vdevice { struct iommufd_object obj; struct iommufd_viommu *viommu; + struct iommufd_device *idev; struct device *dev; /* diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 554aacf89ea7..c218c89e0e2e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1070,6 +1070,11 @@ struct iommu_viommu_alloc { * * Allocate a virtual device instance (for a physical device) against a vIOMMU. * This instance holds the device's information (related to its vIOMMU) in a VM. + * User should use IOMMU_DESTROY to destroy the virtual device before + * destroying the physical device (by closing vfio_cdev fd). Otherwise the + * virtual device would be forcibly destroyed on physical device destruction, + * its vdevice_id would be permanently leaked (unremovable & unreusable) until + * iommu fd closed. */ struct iommu_vdevice_alloc { __u32 size; -- cgit v1.2.3 From 651f733675c4a26e59dd34522917eace20c557c0 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:46 +0800 Subject: iommufd/vdevice: Remove struct device reference from struct vdevice Remove struct device *dev from struct vdevice. The dev pointer is the Plan B for vdevice to reference the physical device. As now vdev->idev is added without refcounting concern, just use vdev->idev->dev when needed. To avoid exposing struct iommufd_device in the public header, export a iommufd_vdevice_to_device() helper. Link: https://patch.msgid.link/r/20250716070349.1807226-6-yilun.xu@linux.intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index b88911026bc4..810e4d8ac912 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -109,7 +109,6 @@ struct iommufd_vdevice { struct iommufd_object obj; struct iommufd_viommu *viommu; struct iommufd_device *idev; - struct device *dev; /* * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of @@ -261,6 +260,7 @@ int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, unsigned long *offset); void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, unsigned long offset); +struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -295,6 +295,12 @@ static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, { } +static inline struct device * +iommufd_vdevice_to_device(struct iommufd_vdevice *vdev) +{ + return NULL; +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { -- cgit v1.2.3 From ab6bc44159d8f0c4ee757e0ce041fa9033e0ead8 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:49 +0800 Subject: iommufd: Rename some shortterm-related identifiers Rename the shortterm-related identifiers to wait-related. The usage of shortterm_users refcount is now beyond its name. It is also used for references which live longer than an ioctl execution. E.g. vdev holds idev's shortterm_users refcount on vdev allocation, releases it during idev's pre_destroy(). Rename the refcount as wait_cnt, since it is always used to sync the referencing & the destruction of the object by waiting for it to go to zero. List all changed identifiers: iommufd_object::shortterm_users -> iommufd_object::wait_cnt REMOVE_WAIT_SHORTTERM -> REMOVE_WAIT iommufd_object_dec_wait_shortterm() -> iommufd_object_dec_wait() zerod_shortterm -> zerod_wait_cnt No functional change intended. Link: https://patch.msgid.link/r/20250716070349.1807226-9-yilun.xu@linux.intel.com Suggested-by: Kevin Tian Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 810e4d8ac912..6e7efe83bc5d 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -46,7 +46,13 @@ enum iommufd_object_type { /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { - refcount_t shortterm_users; + /* + * Destroy will sleep and wait for wait_cnt to go to zero. This allows + * concurrent users of the ID to reliably avoid causing a spurious + * destroy failure. Incrementing this count should either be short + * lived or be revoked and blocked during pre_destroy(). + */ + refcount_t wait_cnt; refcount_t users; enum iommufd_object_type type; unsigned int id; -- cgit v1.2.3 From a6f190630d070173897a7e98a30188b7638ba0a1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 16 Jul 2025 18:26:53 +0200 Subject: net: track pfmemalloc drops via SKB_DROP_REASON_PFMEMALLOC Add a new SKB drop reason (SKB_DROP_REASON_PFMEMALLOC) to track packets dropped due to memory pressure. In production environments, we've observed memory exhaustion reported by memory layer stack traces, but these drops were not properly tracked in the SKB drop reason infrastructure. While most network code paths now properly report pfmemalloc drops, some protocol-specific socket implementations still use sk_filter() without drop reason tracking: - Bluetooth L2CAP sockets - CAIF sockets - IUCV sockets - Netlink sockets - SCTP sockets - Unix domain sockets These remaining cases represent less common paths and could be converted in a follow-up patch if needed. The current implementation provides significantly improved observability into memory pressure events in the network stack, especially for key protocols like TCP and UDP, helping to diagnose problems in production environments. Reported-by: Matt Fleming Signed-off-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/175268316579.2407873.11634752355644843509.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 14 ++++++++++++-- include/net/dropreason-core.h | 6 ++++++ include/net/tcp.h | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index f5cf4d35d83e..4e82332afe03 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1073,10 +1073,20 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap, + enum skb_drop_reason *reason); + static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { - return sk_filter_trim_cap(sk, skb, 1); + enum skb_drop_reason ignore_reason; + + return sk_filter_trim_cap(sk, skb, 1, &ignore_reason); +} + +static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason) +{ + return sk_filter_trim_cap(sk, skb, 1, reason); } struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 229bb1826f2a..e19184dd1b0f 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -125,6 +125,7 @@ FN(CAN_RX_INVALID_FRAME) \ FN(CANFD_RX_INVALID_FRAME) \ FN(CANXL_RX_INVALID_FRAME) \ + FN(PFMEMALLOC) \ FNe(MAX) /** @@ -598,6 +599,11 @@ enum skb_drop_reason { * non conform CAN-XL frame (or device is unable to receive CAN frames) */ SKB_DROP_REASON_CANXL_RX_INVALID_FRAME, + /** + * @SKB_DROP_REASON_PFMEMALLOC: packet allocated from memory reserve + * reached a path or socket not eligible for use of memory reserves + */ + SKB_DROP_REASON_PFMEMALLOC, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/tcp.h b/include/net/tcp.h index bc08de49805c..b3815d104340 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1559,7 +1559,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); -int tcp_filter(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); int tcp_abort(struct sock *sk, int err); -- cgit v1.2.3 From ffea1168346120df9417fcafd8f3a1c93033ae34 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:27 -0700 Subject: net: s/dev_get_port_parent_id/netif_get_port_parent_id/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e49d8c98d284..c6ba4ea66039 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4223,8 +4223,8 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); -int dev_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid, bool recurse); +int netif_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); -- cgit v1.2.3 From af1d017377c1c1931bfb898e719ab712cf79f944 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:28 -0700 Subject: net: s/dev_get_mac_address/netif_get_mac_address/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. netif_get_mac_address is used only by tun/tap, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c6ba4ea66039..b3a48934b4cb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4222,7 +4222,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); -int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); +int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int netif_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -- cgit v1.2.3 From 0413a34ef678c3e2f0fafb4e113e810a05197030 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:29 -0700 Subject: net: s/dev_pre_changeaddr_notify/netif_pre_changeaddr_notify/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. netif_pre_changeaddr_notify is used only by ipvlan/bond, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3a48934b4cb..55c5cd9d1929 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4214,8 +4214,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, int __dev_set_mtu(struct net_device *, int); int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); -int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, - struct netlink_ext_ack *extack); +int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr, + struct netlink_ext_ack *extack); int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, -- cgit v1.2.3 From 303a8487a657c357ca6abc06a4045f72cdae90d5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:30 -0700 Subject: net: s/__dev_set_mtu/__netif_set_mtu/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. __netif_set_mtu is used only by bond, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-5-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 55c5cd9d1929..8978fbfbd644 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4211,7 +4211,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat); -int __dev_set_mtu(struct net_device *, int); +int __netif_set_mtu(struct net_device *dev, int new_mtu); int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr, -- cgit v1.2.3 From 93893a57efd431b9b4e72359bc8a8428681ca688 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:31 -0700 Subject: net: s/dev_get_flags/netif_get_flags/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8978fbfbd644..8370cd0f8f6b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4196,7 +4196,7 @@ int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); -unsigned int dev_get_flags(const struct net_device *); +unsigned int netif_get_flags(const struct net_device *dev); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int netif_change_flags(struct net_device *dev, unsigned int flags, -- cgit v1.2.3 From 5d4d84618e1aa2c9531afa3a6323f56e1db4dcf7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:32 -0700 Subject: net: s/dev_set_threaded/netif_set_threaded/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. Note that one dev_set_threaded call still remains in mt76 for debugfs file. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8370cd0f8f6b..7929ddfd4433 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -589,6 +589,7 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } +int netif_set_threaded(struct net_device *dev, bool threaded); int dev_set_threaded(struct net_device *dev, bool threaded); void napi_disable(struct napi_struct *n); -- cgit v1.2.3 From 88d3cec28274f9c15355835466c0c694e313680e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:33 -0700 Subject: net: s/dev_close_many/netif_close_many/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. netif_close_many is used only by vlan/dsa and one mtk driver, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7929ddfd4433..5aee8d3895f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3343,7 +3343,7 @@ int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); -void dev_close_many(struct list_head *head, bool unlink); +void netif_close_many(struct list_head *head, bool unlink); void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); -- cgit v1.2.3 From 954c0d74129948eed5c8f4a6898d3d5b344c8b18 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Jul 2025 11:55:41 -0700 Subject: srcu: Add guards for SRCU-fast readers This adds the usual scoped_guard(srcu_fast, &my_srcu) and guard(srcu_fast)(&my_srcu). Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney Cc: Mathieu Desnoyers Cc: Sebastian Andrzej Siewior Reviewed-by: Steven Rostedt (Google) Signed-off-by: Neeraj Upadhyay (AMD) --- include/linux/srcu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index cf711a0f440b..f179700fecaf 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -481,4 +481,9 @@ DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, srcu_read_unlock(_T->lock, _T->idx), int idx) +DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct, + _T->scp = srcu_read_lock_fast(_T->lock), + srcu_read_unlock_fast(_T->lock, _T->scp), + struct srcu_ctr __percpu *scp) + #endif -- cgit v1.2.3 From d2b5be741a5045272b9d711908eab017632ac022 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 5 Jul 2025 10:49:58 -0700 Subject: mm/damon/sysfs: use DAMON core API damon_is_running() DAMON core implements a static function to see if a given DAMON context is running. DAMON sysfs interface is implementing the same one on its own. Make the core function non-static and reuse it from the DAMON sysfs interface. Link: https://lkml.kernel.org/r/20250705175000.56259-5-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index bb58e36f019e..e1fea3119538 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -934,6 +934,7 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); +bool damon_is_running(struct damon_ctx *ctx); int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); -- cgit v1.2.3 From fdc5001b002eaca9989b5f3563245662fd1e4d40 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 4 Jun 2025 12:51:11 +0300 Subject: mm/vmstat: make MEMCG select VM_EVENT_COUNTERS The vmstat_text array contains labels for counters displayed in /proc/vmstat. It is important to keep the labels in sync with the counters. There is a BUILD_BUG_ON() check in vmstat_start() that ensures the size of the vmstat_text is not smaller than VM_EVENT_COUNTERS. This helps to catch cases where a new counter is added but the label is not. However, it does not help if a counter is removed but the label remains. It would be nice to make the BUILD_BUG_ON() check more strict to catch such cases. However, when compiling with MEMCG enabled but VM_EVENT_COUNTERS disabled, the vmstat_text array is larger than NR_VMSTAT_ITEMS. This issue arises because some elements of the vmstat_text array are present when either MEMCG or VM_EVENT_COUNTERS is enabled, but NR_VMSTAT_ITEMS only accounts for these elements if VM_EVENT_COUNTERS is enabled. Instead of adjusting the NR_VMSTAT_ITEMS definition to account for MEMCG, make MEMCG select VM_EVENT_COUNTERS. VM_EVENT_COUNTERS is enabled in most configurations anyway. Link: https://lkml.kernel.org/r/20250604095111.533783-1-kirill.shutemov@linux.intel.com Fixes: ebc5d83d0443 ("mm/memcontrol: use vmstat names for printing statistics") Signed-off-by: Kirill A. Shutemov Reported-by: Randy Dunlap Acked-by: Vlastimil Babka Tested-by: Randy Dunlap Acked-by: Randy Dunlap Acked-by: Shakeel Butt Cc: Konstantin Khlebnikov Cc: David Hildenbrand Cc: Johannes Weiner Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/vmstat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b2ccb6845595..c287998908bf 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -507,7 +507,7 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) +#if defined(CONFIG_VM_EVENT_COUNTERS) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + @@ -516,7 +516,7 @@ static inline const char *vm_event_name(enum vm_event_item item) NR_VM_STAT_ITEMS + item]; } -#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ +#endif /* CONFIG_VM_EVENT_COUNTERS */ #ifdef CONFIG_MEMCG -- cgit v1.2.3 From 8356a5a3b078ca89c526dd6d71e9a76fec571c37 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 25 Jun 2025 17:51:52 +0200 Subject: mm, vmstat: remove the NR_WRITEBACK_TEMP node_stat_item counter The only user of the counter (FUSE) was removed in commit 0c58a97f919c ("fuse: remove tmp folio for writebacks and internal rb tree") so follow the established pattern of removing the counter and hardcoding 0 in meminfo output, as done recently with NR_BOUNCE. Update documentation for procfs, including for the value for Bounce that was missed when removing its counter. Also remove the mention of NR_WRITEBACK_TEMP implications from a comment in wb_position_ratio(). The rest of the comment there about fuse setting bdi->max_ratio to 1% is still correct. [vbabka@suse.cz: v2] Link: https://lkml.kernel.org/r/5a848e15-6a57-4ecb-a015-d4f358b8a5d3@suse.cz Link: https://lkml.kernel.org/r/20250625-nr_writeback_removal-v1-1-7f2a0df70faa@suse.cz Signed-off-by: Vlastimil Babka Cc: Brendan Jackman Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: Jan Kara Cc: Jeff Layton Cc: Jeffle Xu Cc: Jens Axboe Cc: Joanne Koong Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shuemov Cc: Matthew Wilcox (Oracle) Cc: Maxim Patlasov Cc: Michal Hocko Cc: Miklos Szeredi Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Zach O'Keefe Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1d1bb2b7f40d..0c5da9141983 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -206,7 +206,6 @@ enum node_stat_item { NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, - NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, -- cgit v1.2.3 From a2c24eae5a15f79673eba2913d87d658a04830cf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 8 Jul 2025 19:59:31 -0500 Subject: mm/damon: add struct damos_migrate_dests Patch series "mm/damon/vaddr: Allow interleaving in migrate_{hot,cold} actions", v4. A recent patchset automatically sets the interleave weight for each node according to the node's maximum bandwidth [1]. In another thread, the patch set's author, Joshua Hahn, wondered if/how thes weights should be changed if the bandwidth utilization of the system changes [2]. This patch set adds the mechanism for dynamically changing how application data is interleaved across nodes while leaving the policy of what the interleave weights should be to userspace. It does this by having the migrate_{hot,cold} operating schemes interleave application data according to the list of migration nodes and weights passed in via the DAMON sysfs interface. This functionality can be used to dynamically adjust how folios are interleaved by having a userspace process adjust those weights. If no specific destination nodes or weights are provided, the migrate_{hot,cold} actions will only migrate folios to damos->target_nid as before. The algorithm used to interleave the folios is similar to the one used for the weighted interleave mempolicy [3]. It uses the offset from which a folio is mapped into a VMA to determine the node the folio should be placed in. This method is convenient because for a given set of interleave weights, a folio has only one valid node it can be placed in, limitng the amount of unnecessary data movement. However, finding out how a folio is mapped inside of a VMA requires a costly rmap walk when using a paddr scheme. As such, we have decided that this functionality makes more sense as a vaddr scheme [4]. To this end, this patch set also adds vaddr versions of the migrate_{hot,cold}. Motivation ========== There have been prior discussions about how changing the interleave weights in response to the system's bandwidth utilization can be beneficial [2]. However, currently the interleave weights only are applied when data is allocated. Migrating already allocated pages according to the dynamically changing weights will better help balance the bandwidth utilization across nodes. As a toy example, imagine some application that uses 75% of the local bandwidth. Assuming sufficient capacity, when running alone, we want to keep that application's data in local memory. However, if a second instance of that application begins, using the same amount of bandwidth, it would be best to interleave the data of both processes to alleviate the bandwidth pressure from the local node. Likewise, when one of the processes ends, the data should be moves back to local memory. We imagine there would be a userspace application that would monitor system performance characteristics, such as bandwidth utilization or memory access latency, and uses that information to tune the interleave weights. Others seem to have come to a similar conclusion in previous discussions [5]. We are currently working on a userspace program that does this, but it is not quite ready to be published yet. After the userspace application tunes the interleave weights, there must be some mechanism that actually migrates pages to be consistent with those weights. This patchset is what provides this mechanism. We believe DAMON is the correct venue for the interleaving mechanism for a few reasons. First, we noticed that we don't have to migrate all of the application's pages to improve performance. we just need to migrate the frequently accessed pages. DAMON's existing hotness traching is very useful for this. Second, DAMON's quota system can be used to ensure we are not using too much bandwidth for migrations. Finally, as Ying pointed out [6], a complete solution must also handle when a memory node is at capacity. The existing migrate_cold action can be used in conjunction with the functionality added in this patch set to provide that complete solution. Functionality Test ================== Below is an example of this new functionality in use to confirm that these patches behave as intended. In this example, the user starts an application, alloc_data, which allocates 1GB using the default memory policy (i.e. allocate to local memory) then sleeps. Afterwards, we start DAMON to interleave the data at a 1:1 ratio. Using numastat, we show that DAMON has migrated the application's data to match the new interleave ratio. For this example, I modified the userspace damo tool [8] to write to the migration_dest sysfs files. I plan to upstream these changes when these patches are merged. $ # Allocate the data initially $ ./alloc_data 1G & [1] 6587 $ numastat -c -p alloc_data Per-node process memory usage (in MBs) for PID 6587 (alloc_data) Node 0 Node 1 Total ------ ------ ----- Huge 0 0 0 Heap 0 0 0 Stack 0 0 0 Private 1027 0 1027 ------- ------ ------ ----- Total 1027 0 1027 $ # Start DAMON to interleave data at a 1:1 ratio $ cat ./interleave_vaddr.yaml kdamonds: - contexts: - ops: vaddr addr_unit: null targets: - pid: 6587 regions: [] intervals: sample_us: 500 ms aggr_us: 5 s ops_update_us: 20 s intervals_goal: access_bp: 0 % aggrs: '0' min_sample_us: 0 ns max_sample_us: 0 ns nr_regions: min: '20' max: '50' schemes: - action: migrate_hot dests: - nid: 0 weight: 1 - nid: 1 weight: 1 access_pattern: sz_bytes: min: 0 B max: max nr_accesses: min: 0 % max: 100 % age: min: 0 ns max: max $ sudo ./damo/damo interleave_vaddr.yaml $ # Verify that DAMON has migrated data to match the 1:1 ratio $ numastat -c -p alloc_data Per-node process memory usage (in MBs) for PID 6587 (alloc_data) Node 0 Node 1 Total ------ ------ ----- Huge 0 0 0 Heap 0 0 0 Stack 0 0 0 Private 514 514 1027 ------- ------ ------ ----- Total 514 514 1027 Performance Test ================ Below is a simple example showing that interleaving application data using these patches can improve application performance. To do this, we run a bandwidth intensive embedding reduction application [7]. This workload is useful for this test because it reports the time it takes each iteration to run and each iteration reuses the same allocation, allowing us to see the benefits of the migration. We evaluate this on a 128 core/256 thread AMD CPU with 72GB/s of local DDR bandwidth and 26 GB/s of CXL bandwidth. Before we start the workload, the system bandwidth utilization is low, so we start with the interleave weights of 1:0, i.e. allocating all data to local memory. When the workload beings, it saturates the local bandwidth, making the page placement suboptimal. To alleviate this, we modify the interleave weights, triggering DAMON to migrate the workload's data. We use the same interleave_vaddr.yaml file to setup DAMON, except we configure it to begin with a 1:0 interleave ratio, and attach it to the shell and its children processes. $ sudo ./damo/damo start interleave_vaddr.yaml --include_child_tasks & $ /eval_baseline -d amazon_All -c 255 -r 100 Eval Phase 3: Running Baseline... REPEAT # 0 Baseline Total time : 7323.54 ms REPEAT # 1 Baseline Total time : 7624.56 ms REPEAT # 2 Baseline Total time : 7619.61 ms REPEAT # 3 Baseline Total time : 7617.12 ms REPEAT # 4 Baseline Total time : 7638.64 ms REPEAT # 5 Baseline Total time : 7611.27 ms REPEAT # 6 Baseline Total time : 7629.32 ms REPEAT # 7 Baseline Total time : 7695.63 ms # Interleave weights set to 3:1 REPEAT # 8 Baseline Total time : 7077.5 ms REPEAT # 9 Baseline Total time : 5633.23 ms REPEAT # 10 Baseline Total time : 5644.6 ms REPEAT # 11 Baseline Total time : 5627.66 ms REPEAT # 12 Baseline Total time : 5629.76 ms REPEAT # 13 Baseline Total time : 5633.05 ms REPEAT # 14 Baseline Total time : 5641.24 ms REPEAT # 15 Baseline Total time : 5631.18 ms REPEAT # 16 Baseline Total time : 5631.33 ms Updating the interleave weights and having DAMON migrate the workload data according to the weights resulted in an approximarely 25% speedup. Patches Sequence ================ Patches 1-7 extend the DAMON API to specify multiple destination nodes and weights for the migrate_{hot,cold} actions. These patches are from SJ'S RFC [8]. Patches 8-10 add a vaddr implementation of the migrate_{hot,cold} schemes. Patch 11 modifies the vaddr migrate_{hot,cold} schemes to interleave data according to the weights provided by damos->migrate_dest. Patches 12-13 allow the vaddr migrate_{hot,cold} implementation to filter out folios like the paddr version. This patch (of 13): Introduce a new struct, namely damos_migrate_dests, for specifying multiple DAMOS' migration destination nodes and their weights. Link: https://lkml.kernel.org/r/20250709005952.17776-1-bijan311@gmail.com Link: https://lkml.kernel.org/r/20250709005952.17776-2-bijan311@gmail.com Link: https://lore.kernel.org/linux-mm/20250520141236.2987309-1-joshua.hahnjy@gmail.com/ [1] Link: https://lore.kernel.org/linux-mm/20250313155705.1943522-1-joshua.hahnjy@gmail.com/ [2] Link: https://elixir.bootlin.com/linux/v6.15.4/source/mm/mempolicy.c#L2015 [3] Link: https://lore.kernel.org/damon/20250624223310.55786-1-sj@kernel.org/ [4] Link: https://lore.kernel.org/linux-mm/20250314151137.892379-1-joshua.hahnjy@gmail.com/ [5] Link: https://lore.kernel.org/linux-mm/87frjfx6u4.fsf@DESKTOP-5N7EMDA/ [6] Link: https://github.com/SNU-ARC/MERCI [7] Link: https://lore.kernel.org/damon/20250702051558.54138-1-sj@kernel.org/ [8] Signed-off-by: SeongJae Park Signed-off-by: Bijan Tabatabai Reviewed-by: SeongJae Park Cc: Jonathan Corbet Cc: Ravi Shankar Jonnalagadda Signed-off-by: Andrew Morton --- include/linux/damon.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index e1fea3119538..07cee590ff09 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -447,6 +447,22 @@ struct damos_access_pattern { unsigned int max_age_region; }; +/** + * struct damos_migrate_dests - Migration destination nodes and their weights. + * @node_id_arr: Array of migration destination node ids. + * @weight_arr: Array of migration weights for @node_id_arr. + * @nr_dests: Length of the @node_id_arr and @weight_arr arrays. + * + * @node_id_arr is an array of the ids of migration destination nodes. + * @weight_arr is an array of the weights for those. The weights in + * @weight_arr are for nodes in @node_id_arr of same array index. + */ +struct damos_migrate_dests { + unsigned int *node_id_arr; + unsigned int *weight_arr; + size_t nr_dests; +}; + /** * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. -- cgit v1.2.3 From aabc85ee33c883243f2c506a5d88963f2456faa6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 8 Jul 2025 19:59:32 -0500 Subject: mm/damon/core: add damos->migrate_dests field Add a new field to 'struct damos', namely migrate_dests, to allow DAMON API callers specify multiple migration destination nodes and their weights. Also update 'struct damos' creation and destruction functions accordingly to initialize the new field and free up the API caller-allocated buffers on those, respectively. Link: https://lkml.kernel.org/r/20250709005952.17776-3-bijan311@gmail.com Signed-off-by: SeongJae Park Signed-off-by: Bijan Tabatabai Reviewed-by: SeongJae Park Cc: Jonathan Corbet Cc: Ravi Shankar Jonnalagadda Signed-off-by: Andrew Morton --- include/linux/damon.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 07cee590ff09..1f425d830bb9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -470,6 +470,7 @@ struct damos_migrate_dests { * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. + * @migrate_dests: Destination nodes if @action is "migrate_{hot,cold}". * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. * @ops_filters: ops layer handling &struct damos_filter objects list. @@ -488,9 +489,12 @@ struct damos_migrate_dests { * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * + * @migrate_dests specifies multiple migration target nodes with different + * weights for migrate_hot or migrate_cold actions. @target_nid is ignored if + * this is set. + * * @target_nid is used to set the migration target node for migrate_hot or - * migrate_cold actions, which means it's only meaningful when @action is either - * "migrate_hot" or "migrate_cold". + * migrate_cold actions, and @migrate_dests is unset. * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect @@ -533,7 +537,10 @@ struct damos { struct damos_quota quota; struct damos_watermarks wmarks; union { - int target_nid; + struct { + int target_nid; + struct damos_migrate_dests migrate_dests; + }; }; struct list_head filters; struct list_head ops_filters; -- cgit v1.2.3 From e05d3a6014fd4ae224b44b89fbeacfaa4ace0a8e Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 9 Jul 2025 12:40:18 -0700 Subject: mm: remove unmap_and_put_page() There are no callers of unmap_and_put_page() left. Remove it. Link: https://lkml.kernel.org/r/20250709194017.927978-6-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Acked-by: David Hildenbrand Cc: Jordan Rome Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/highmem.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index a30526cc53a7..6234f316468c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -682,10 +682,4 @@ static inline void folio_release_kmap(struct folio *folio, void *addr) kunmap_local(addr); folio_put(folio); } - -static inline void unmap_and_put_page(struct page *page, void *addr) -{ - folio_release_kmap(page_folio(page), addr); -} - #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From b980077899ea49cc747afe003e01ca303b00d463 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 23 Jun 2025 11:58:51 -0700 Subject: mm: introduce per-node proactive reclaim interface This adds support for allowing proactive reclaim in general on a NUMA system. A per-node interface extends support for beyond a memcg-specific interface, respecting the current semantics of memory.reclaim: respecting aging LRU and not supporting artificially triggering eviction on nodes belonging to non-bottom tiers. This patch allows userspace to do: echo "512M swappiness=10" > /sys/devices/system/node/nodeX/reclaim One of the premises for this is to semantically align as best as possible with memory.reclaim. During a brief time memcg did support nodemask until 55ab834a86a9 (Revert "mm: add nodes= arg to memory.reclaim"), for which semantics around reclaim (eviction) vs demotion were not clear, rendering charging expectations to be broken. With this approach: 1. Users who do not use memcg can benefit from proactive reclaim. The memcg interface is not NUMA aware and there are usecases that are focusing on NUMA balancing rather than workload memory footprint. 2. Proactive reclaim on top tiers will trigger demotion, for which memory is still byte-addressable. Reclaiming on the bottom nodes will trigger evicting to swap (the traditional sense of reclaim). This follows the semantics of what is today part of the aging process on tiered memory, mirroring what every other form of reclaim does (reactive and memcg proactive reclaim). Furthermore per-node proactive reclaim is not as susceptible to the memcg charging problem mentioned above. 3. Unlike the nodes= arg, this interface avoids confusing semantics, such as what exactly the user wants when mixing top-tier and low-tier nodes in the nodemask. Further per-node interface is less exposed to "free up memory in my container" usecases, where eviction is intended. 4. Users that *really* want to free up memory can use proactive reclaim on nodes knowingly to be on the bottom tiers to force eviction in a natural way - higher access latencies are still better than swap. If compelled, while no guarantees and perhaps not worth the effort, users could also also potentially follow a ladder-like approach to eventually free up the memory. Alternatively, perhaps an 'evict' option could be added to the parameters for both memory.reclaim and per-node interfaces to force this action unconditionally. [akpm@linux-foundation.org: user_proactive_reclaim(): return -EBUSY on PGDAT_RECLAIM_LOCKED contention, per Roman] [dave@stgolabs.net: memcg && node is also a bogus case, per Shakeel] Link: https://lkml.kernel.org/r/20250717235604.2atyx2aobwowpge3@offworld Link: https://lkml.kernel.org/r/20250623185851.830632-5-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/swap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index a49be950c485..95c6061fa1dc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -431,6 +431,22 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); +#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) +extern int reclaim_register_node(struct node *node); +extern void reclaim_unregister_node(struct node *node); + +#else + +static inline int reclaim_register_node(struct node *node) +{ + return 0; +} + +static inline void reclaim_unregister_node(struct node *node) +{ +} +#endif /* CONFIG_SYSFS && CONFIG_NUMA */ + #ifdef CONFIG_NUMA extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; -- cgit v1.2.3 From 004ded6bee11b8ed463cdc54b89a4390f4b64f6d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 12 Jul 2025 12:50:03 -0700 Subject: mm/damon: accept parallel damon_call() requests Patch series "mm/damon: remove damon_callback". damon_callback was the only way for communicating with DAMON for contexts running on its worker thread. The interface is flexible and simple. But as DAMON evolves with more features, damon_callback has become somewhat too old. With runtime parameters update, for example, its lack of synchronization support was found to be inconvenient. Arguably it is also not easy to use correctly since the callers should understand when each callback is called, and implication of the return values from the callbacks. To replace it, damon_call() and damos_walk() are introduced. And those replaced a few damon_callback use cases. Some use cases of damon_callback such as parallel or repetitive DAMON internal data reading and additional cleanups cannot simply be replaced by damon_call() and damos_walk(), though. To allow those replaceable, extend damon_call() for parallel and/or repeated callbacks and modify the core/ops layers for additional resources cleanup. With the updates, replace the remaining damon_callback usages and finally say goodbye to damon_callback. This patch (of 14): Calling damon_call() while it is serving for another parallel thread immediately fails with -EBUSY. The caller should call it again, later. Each caller implementing such retry logic would be redundant. Accept parallel damon_call() requests and do the wait instead of the caller. Link: https://lkml.kernel.org/r/20250712195016.151108-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250712195016.151108-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 1f425d830bb9..562c7876ba88 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -673,6 +673,8 @@ struct damon_call_control { struct completion completion; /* informs if the kdamond canceled @fn infocation */ bool canceled; + /* List head for siblings. */ + struct list_head list; }; /** @@ -798,8 +800,9 @@ struct damon_ctx { /* for scheme quotas prioritization */ unsigned long *regions_score_histogram; - struct damon_call_control *call_control; - struct mutex call_control_lock; + /* lists of &struct damon_call_control */ + struct list_head call_controls; + struct mutex call_controls_lock; struct damos_walk_control *walk_control; struct mutex walk_control_lock; -- cgit v1.2.3 From 43df7676e5508895c33b846d37cff9cf3b52674c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 12 Jul 2025 12:50:04 -0700 Subject: mm/damon/core: introduce repeat mode damon_call() damon_call() can be useful for reading or writing DAMON internal data for one time. A common pattern of DAMON core usage from DAMON modules is doing such reads and writes repeatedly, for example, to periodically update the DAMOS stats. To do that with damon_call(), callers should call damon_call() repeatedly, with their own delay loop. Each caller doing that is repetitive. Introduce a repeat mode damon_call(). Callers can use the mode by setting a new field in damon_call_control. If the mode is turned on, damon_call() returns success immediately, and DAMON repeats invoking the callback function inside the kdamond main loop. Link: https://lkml.kernel.org/r/20250712195016.151108-3-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 562c7876ba88..b83987275ff9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -659,6 +659,7 @@ struct damon_callback { * * @fn: Function to be called back. * @data: Data that will be passed to @fn. + * @repeat: Repeat invocations. * @return_code: Return code from @fn invocation. * * Control damon_call(), which requests specific kdamond to invoke a given @@ -667,6 +668,7 @@ struct damon_callback { struct damon_call_control { int (*fn)(void *data); void *data; + bool repeat; int return_code; /* private: internal use only */ /* informs if the kdamond finished handling of the request */ -- cgit v1.2.3 From 7114bc5e01cf393e1fdc97e10399eb9451b6af45 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 12 Jul 2025 12:50:11 -0700 Subject: mm/damon/core: add cleanup_target() ops callback Some DAMON operation sets may need additional cleanup per target. For example, [f]vaddr need to put pids of each target. Each user and core logic is doing that redundantly. Add another DAMON ops callback that will be used for doing such cleanups in operations set layer. [sj@kernel.org: add kernel-doc comment for damon_operations->cleanup_target] Link: https://lkml.kernel.org/r/20250715185239.89152-2-sj@kernel.org [sj@kernel.org: remove damon_ctx->callback kernel-doc comment] Link: https://lkml.kernel.org/r/20250715185239.89152-3-sj@kernel.org Link: https://lkml.kernel.org/r/20250712195016.151108-10-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index b83987275ff9..8c765e36623a 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -576,6 +576,7 @@ enum damon_ops_id { * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. + * @cleanup_target: Clean up each target before deallocation. * @cleanup: Clean up the context. * * DAMON can be extended for various address spaces and usages. For this, @@ -608,6 +609,7 @@ enum damon_ops_id { * filters (&struct damos_filter) that handled by itself. * @target_valid should check whether the target is still valid for the * monitoring. + * @cleanup_target is called before the target will be deallocated. * @cleanup is called from @kdamond just before its termination. */ struct damon_operations { @@ -623,6 +625,7 @@ struct damon_operations { struct damon_target *t, struct damon_region *r, struct damos *scheme, unsigned long *sz_filter_passed); bool (*target_valid)(struct damon_target *t); + void (*cleanup_target)(struct damon_target *t); void (*cleanup)(struct damon_ctx *context); }; @@ -771,7 +774,6 @@ struct damon_attrs { * Accesses to other fields must be protected by themselves. * * @ops: Set of monitoring operations for given use cases. - * @callback: Set of callbacks for monitoring events notifications. * * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. @@ -933,7 +935,7 @@ struct damon_target *damon_new_target(void); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); bool damon_targets_empty(struct damon_ctx *ctx); void damon_free_target(struct damon_target *t); -void damon_destroy_target(struct damon_target *t); +void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx); unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); -- cgit v1.2.3 From 5add26c0a18636e8e9fe409d4591c8a36e1bf695 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 12 Jul 2025 12:50:16 -0700 Subject: mm/damon/core: remove damon_callback All damon_callback usages are replicated by damon_call() and damos_walk(). Time to say goodbye. Remove damon_callback. Link: https://lkml.kernel.org/r/20250712195016.151108-15-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 8c765e36623a..f13664c62ddd 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -629,34 +629,6 @@ struct damon_operations { void (*cleanup)(struct damon_ctx *context); }; -/** - * struct damon_callback - Monitoring events notification callbacks. - * - * @after_wmarks_check: Called after each schemes' watermarks check. - * @after_aggregation: Called after each aggregation. - * @before_terminate: Called before terminating the monitoring. - * - * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just - * before finishing the monitoring. - * - * The monitoring thread calls @after_wmarks_check after each DAMON-based - * operation schemes' watermarks check. If users need to make changes to the - * attributes of the monitoring context while it's deactivated due to the - * watermarks, this is the good place to do. - * - * The monitoring thread calls @after_aggregation for each of the aggregation - * intervals. Therefore, users can safely access the monitoring results - * without additional protection. For the reason, users are recommended to use - * these callback for the accesses to the results. - * - * If any callback returns non-zero, monitoring stops. - */ -struct damon_callback { - int (*after_wmarks_check)(struct damon_ctx *context); - int (*after_aggregation)(struct damon_ctx *context); - void (*before_terminate)(struct damon_ctx *context); -}; - /* * struct damon_call_control - Control damon_call(). * @@ -727,7 +699,7 @@ struct damon_intervals_goal { * ``mmap()`` calls from the application, in case of virtual memory monitoring) * and applies the changes for each @ops_update_interval. All time intervals * are in micro-seconds. Please refer to &struct damon_operations and &struct - * damon_callback for more detail. + * damon_call_control for more detail. */ struct damon_attrs { unsigned long sample_interval; @@ -816,7 +788,6 @@ struct damon_ctx { struct mutex kdamond_lock; struct damon_operations ops; - struct damon_callback callback; struct list_head adaptive_targets; struct list_head schemes; -- cgit v1.2.3 From 9989db9f230542cfc097be3291b9457173371eb1 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 11 Jul 2025 10:59:10 -0400 Subject: mm/page_owner: convert set_page_owner_migrate_reason() to folios Both callers of set_page_owner_migrate_reason() use folios. Convert the function to take a folio directly and move the &folio->page conversion inside __set_page_owner_migrate_reason(). Link: https://lkml.kernel.org/r/20250711145910.90135-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Oscar Salvador Cc: Muchun Song Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page_owner.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index debdc25f08b9..3328357f6dba 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page, extern void __split_page_owner(struct page *page, int old_order, int new_order); extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); -extern void __set_page_owner_migrate_reason(struct page *page, int reason); +extern void __folio_set_owner_migrate_reason(struct folio *folio, int reason); extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); @@ -43,10 +43,10 @@ static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) if (static_branch_unlikely(&page_owner_inited)) __folio_copy_owner(newfolio, old); } -static inline void set_page_owner_migrate_reason(struct page *page, int reason) +static inline void folio_set_owner_migrate_reason(struct folio *folio, int reason) { if (static_branch_unlikely(&page_owner_inited)) - __set_page_owner_migrate_reason(page, reason); + __folio_set_owner_migrate_reason(folio, reason); } static inline void dump_page_owner(const struct page *page) { @@ -68,7 +68,7 @@ static inline void split_page_owner(struct page *page, int old_order, static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) { } -static inline void set_page_owner_migrate_reason(struct page *page, int reason) +static inline void folio_set_owner_migrate_reason(struct folio *folio, int reason) { } static inline void dump_page_owner(const struct page *page) -- cgit v1.2.3 From 35c18f2933c596b4fd6a98baee36f3137d133a5f Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:13:21 +0200 Subject: Add a new optional ",cma" suffix to the crashkernel= command line option Patch series "kdump: crashkernel reservation from CMA", v5. This series implements a way to reserve additional crash kernel memory using CMA. Currently, all the memory for the crash kernel is not usable by the 1st (production) kernel. It is also unmapped so that it can't be corrupted by the fault that will eventually trigger the crash. This makes sense for the memory actually used by the kexec-loaded crash kernel image and initrd and the data prepared during the load (vmcoreinfo, ...). However, the reserved space needs to be much larger than that to provide enough run-time memory for the crash kernel and the kdump userspace. Estimating the amount of memory to reserve is difficult. Being too careful makes kdump likely to end in OOM, being too generous takes even more memory from the production system. Also, the reservation only allows reserving a single contiguous block (or two with the "low" suffix). I've seen systems where this fails because the physical memory is fragmented. By reserving additional crashkernel memory from CMA, the main crashkernel reservation can be just large enough to fit the kernel and initrd image, minimizing the memory taken away from the production system. Most of the run-time memory for the crash kernel will be memory previously available to userspace in the production system. As this memory is no longer wasted, the reservation can be done with a generous margin, making kdump more reliable. Kernel memory that we need to preserve for dumping is normally not allocated from CMA, unless it is explicitly allocated as movable. Currently this is only the case for memory ballooning and zswap. Such movable memory will be missing from the vmcore. User data is typically not dumped by makedumpfile. When dumping of user data is intended this new CMA reservation cannot be used. There are five patches in this series: The first adds a new ",cma" suffix to the recenly introduced generic crashkernel parsing code. parse_crashkernel() takes one more argument to store the cma reservation size. The second patch implements reserve_crashkernel_cma() which performs the reservation. If the requested size is not available in a single range, multiple smaller ranges will be reserved. The third patch updates Documentation/, explicitly mentioning the potential DMA corruption of the CMA-reserved memory. The fourth patch adds a short delay before booting the kdump kernel, allowing pending DMA transfers to finish. The fifth patch enables the functionality for x86 as a proof of concept. There are just three things every arch needs to do: - call reserve_crashkernel_cma() - include the CMA-reserved ranges in the physical memory map - exclude the CMA-reserved ranges from the memory available through /proc/vmcore by excluding them from the vmcoreinfo PT_LOAD ranges. Adding other architectures is easy and I can do that as soon as this series is merged. With this series applied, specifying crashkernel=100M craskhernel=1G,cma on the command line will make a standard crashkernel reservation of 100M, where kexec will load the kernel and initrd. An additional 1G will be reserved from CMA, still usable by the production system. The crash kernel will have 1.1G memory available. The 100M can be reliably predicted based on the size of the kernel and initrd. The new cma suffix is completely optional. When no crashkernel=size,cma is specified, everything works as before. This patch (of 5): Add a new cma_size parameter to parse_crashkernel(). When not NULL, call __parse_crashkernel to parse the CMA reservation size from "crashkernel=size,cma" and store it in cma_size. Set cma_size to NULL in all calls to parse_crashkernel(). Link: https://lkml.kernel.org/r/aEqnxxfLZMllMC8I@dwarf.suse.cz Link: https://lkml.kernel.org/r/aEqoQckgoTQNULnh@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Cc: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/crash_reserve.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 1fe7e7d1b214..e784aaff2f5a 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -16,7 +16,8 @@ extern struct resource crashk_low_res; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); + unsigned long long *low_size, unsigned long long *cma_size, + bool *high); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE -- cgit v1.2.3 From ab475510e0422bb5672d465f9d0f523d72fdb7f1 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:16:39 +0200 Subject: kdump: implement reserve_crashkernel_cma reserve_crashkernel_cma() reserves CMA ranges for the crash kernel. If allocating the requested size fails, try to reserve in smaller blocks. Store the reserved ranges in the crashk_cma_ranges array and the number of ranges in crashk_cma_cnt. Link: https://lkml.kernel.org/r/aEqpBwOy_ekm0gw9@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- include/linux/crash_reserve.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index e784aaff2f5a..7b44b41d0a20 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -13,12 +13,24 @@ */ extern struct resource crashk_res; extern struct resource crashk_low_res; +extern struct range crashk_cma_ranges[]; +#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) +#define CRASHKERNEL_CMA +#define CRASHKERNEL_CMA_RANGES_MAX 4 +extern int crashk_cma_cnt; +#else +#define crashk_cma_cnt 0 +#define CRASHKERNEL_CMA_RANGES_MAX 0 +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, unsigned long long *cma_size, bool *high); +void __init reserve_crashkernel_cma(unsigned long long cma_size); + #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) -- cgit v1.2.3 From b76e89e50fc3693b7b8a443ed906320d8ccb93fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:01 +0800 Subject: panic: generalize panic_print's function to show sys info 'panic_print' was introduced to help debugging kernel panic by dumping different kinds of system information like tasks' call stack, memory, ftrace buffer, etc. Actually this function could also be used to help debugging other cases like task-hung, soft/hard lockup, etc. where user may need the snapshot of system info at that time. Extract system info dump function related code from panic.c to separate file sys_info.[ch], for wider usage by other kernel parts for debugging. Also modify the macro names about singulars/plurals. Link: https://lkml.kernel.org/r/20250703021004.42328-3-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/sys_info.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/sys_info.h (limited to 'include') diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h new file mode 100644 index 000000000000..53b7e27dbf2a --- /dev/null +++ b/include/linux/sys_info.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SYS_INFO_H +#define _LINUX_SYS_INFO_H + +/* + * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special + * handling which only fits panic case. + */ +#define SYS_INFO_TASKS 0x00000001 +#define SYS_INFO_MEM 0x00000002 +#define SYS_INFO_TIMERS 0x00000004 +#define SYS_INFO_LOCKS 0x00000008 +#define SYS_INFO_FTRACE 0x00000010 +#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020 +#define SYS_INFO_ALL_CPU_BT 0x00000040 +#define SYS_INFO_BLOCKED_TASKS 0x00000080 + +void sys_info(unsigned long si_mask); + +#endif /* _LINUX_SYS_INFO_H */ -- cgit v1.2.3 From d747755917bf8ae08f490c3fe7d8e321afab8127 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:02 +0800 Subject: panic: add 'panic_sys_info' sysctl to take human readable string parameter Bitmap definition for 'panic_print' is hard to remember and decode. Add 'panic_sys_info='sysctl to take human readable string like "tasks,mem,timers,locks,ftrace,..." and translate it into bitmap. The detailed mapping is: SYS_INFO_TASKS "tasks" SYS_INFO_MEM "mem" SYS_INFO_TIMERS "timers" SYS_INFO_LOCKS "locks" SYS_INFO_FTRACE "ftrace" SYS_INFO_ALL_CPU_BT "all_bt" SYS_INFO_BLOCKED_TASKS "blocked_tasks" [nathan@kernel.org: add __maybe_unused to sys_info_avail] Link: https://lkml.kernel.org/r/20250708-fix-clang-sys_info_avail-warning-v1-1-60d239eacd64@kernel.org Link: https://lkml.kernel.org/r/20250703021004.42328-4-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Cc: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/sys_info.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h index 53b7e27dbf2a..89d77dc4f2ed 100644 --- a/include/linux/sys_info.h +++ b/include/linux/sys_info.h @@ -2,6 +2,8 @@ #ifndef _LINUX_SYS_INFO_H #define _LINUX_SYS_INFO_H +#include + /* * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special * handling which only fits panic case. @@ -16,5 +18,11 @@ #define SYS_INFO_BLOCKED_TASKS 0x00000080 void sys_info(unsigned long si_mask); +unsigned long sys_info_parse_param(char *str); +#ifdef CONFIG_SYSCTL +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos); +#endif #endif /* _LINUX_SYS_INFO_H */ -- cgit v1.2.3 From ae2da51def76020fa16f53cd3446c00cafe41008 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 27 Jun 2025 15:29:22 +0800 Subject: locking/rwsem: make owner helpers globally available Patch series "extend hung task blocker tracking to rwsems". Inspired by mutex blocker tracking[1], and having already extended it to semaphores, let's now add support for reader-writer semaphores (rwsems). The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while waiting for an rwsem, we just call hung_task_set_blocker(). The hung task detector can then query the rwsem's owner to identify the lock holder. Tracking works reliably for writers, as there can only be a single writer holding the lock, and its task struct is stored in the owner field. The main challenge lies with readers. The owner field points to only one of many concurrent readers, so we might lose track of the blocker if that specific reader unlocks, even while others remain. This is not a significant issue, however. In practice, long-lasting lock contention is almost always caused by a writer. Therefore, reliably tracking the writer is the primary goal of this patch series ;) With this change, the hung task detector can now show blocker task's info like below: [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds. [Fri Jun 27 15:21:34 2025] Tainted: G S 6.16.0-rc3 #8 [Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Fri Jun 27 15:21:34 2025] task:cat state:D stack:0 pid:28631 tgid:28631 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? policy_nodemask+0x215/0x340 [Fri Jun 27 15:21:34 2025] ? _raw_spin_lock_irq+0x8a/0xe0 [Fri Jun 27 15:21:34 2025] ? __pfx__raw_spin_lock_irq+0x10/0x10 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_preempt_disabled+0x15/0x30 [Fri Jun 27 15:21:34 2025] rwsem_down_read_slowpath+0x55e/0xe10 [Fri Jun 27 15:21:34 2025] ? __pfx_rwsem_down_read_slowpath+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx___might_resched+0x10/0x10 [Fri Jun 27 15:21:34 2025] down_read+0xc9/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_down_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __debugfs_file_get+0x14d/0x700 [Fri Jun 27 15:21:34 2025] ? __pfx___debugfs_file_get+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? handle_pte_fault+0x52a/0x710 [Fri Jun 27 15:21:34 2025] ? selinux_file_permission+0x3a9/0x590 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_read+0x4a/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked on an rw-semaphore likely owned by task cat:28630 [Fri Jun 27 15:21:34 2025] task:cat state:S stack:0 pid:28630 tgid:28630 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __mod_timer+0x304/0xa80 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_timeout+0xfb/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_schedule_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx_process_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? down_write+0xc4/0x140 [Fri Jun 27 15:21:34 2025] msleep_interruptible+0xbe/0x150 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_write+0x54/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] This patch (of 3): In preparation for extending blocker tracking to support rwsems, make the rwsem_owner() and is_rwsem_reader_owned() helpers globally available for determining if the blocker is a writer or one of the readers. Additionally, a stale owner pointer in a reader-owned rwsem can lead to false positives in blocker tracking when CONFIG_DETECT_HUNG_TASK_BLOCKER is enabled. To mitigate this, clear the owner field on the reader unlock path, similar to what CONFIG_DEBUG_RWSEMS does. A NULL owner is better than a stale one for diagnostics. Link: https://lkml.kernel.org/r/20250627072924.36567-1-lance.yang@linux.dev Link: https://lkml.kernel.org/r/20250627072924.36567-2-lance.yang@linux.dev Link: https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ [1] Signed-off-by: Lance Yang Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/rwsem.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..544853bed5b9 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) return !list_empty(&sem->wait_list); } +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) +/* + * Return just the real task structure pointer of the owner + */ +extern struct task_struct *rwsem_owner(struct rw_semaphore *sem); + +/* + * Return true if the rwsem is owned by a reader. + */ +extern bool is_rwsem_reader_owned(struct rw_semaphore *sem); +#endif + #else /* !CONFIG_PREEMPT_RT */ #include -- cgit v1.2.3 From 77da18de55ac6417e48905bec8b3c66f023b15a9 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 27 Jun 2025 15:29:23 +0800 Subject: hung_task: extend hung task blocker tracking to rwsems Inspired by mutex blocker tracking[1], and having already extended it to semaphores, let's now add support for reader-writer semaphores (rwsems). The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while waiting for an rwsem, we just call hung_task_set_blocker(). The hung task detector can then query the rwsem's owner to identify the lock holder. Tracking works reliably for writers, as there can only be a single writer holding the lock, and its task struct is stored in the owner field. The main challenge lies with readers. The owner field points to only one of many concurrent readers, so we might lose track of the blocker if that specific reader unlocks, even while others remain. This is not a significant issue, however. In practice, long-lasting lock contention is almost always caused by a writer. Therefore, reliably tracking the writer is the primary goal of this patch series ;) With this change, the hung task detector can now show blocker task's info like below: [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds. [Fri Jun 27 15:21:34 2025] Tainted: G S 6.16.0-rc3 #8 [Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Fri Jun 27 15:21:34 2025] task:cat state:D stack:0 pid:28631 tgid:28631 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? policy_nodemask+0x215/0x340 [Fri Jun 27 15:21:34 2025] ? _raw_spin_lock_irq+0x8a/0xe0 [Fri Jun 27 15:21:34 2025] ? __pfx__raw_spin_lock_irq+0x10/0x10 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_preempt_disabled+0x15/0x30 [Fri Jun 27 15:21:34 2025] rwsem_down_read_slowpath+0x55e/0xe10 [Fri Jun 27 15:21:34 2025] ? __pfx_rwsem_down_read_slowpath+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx___might_resched+0x10/0x10 [Fri Jun 27 15:21:34 2025] down_read+0xc9/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_down_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __debugfs_file_get+0x14d/0x700 [Fri Jun 27 15:21:34 2025] ? __pfx___debugfs_file_get+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? handle_pte_fault+0x52a/0x710 [Fri Jun 27 15:21:34 2025] ? selinux_file_permission+0x3a9/0x590 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_read+0x4a/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked on an rw-semaphore likely owned by task cat:28630 [Fri Jun 27 15:21:34 2025] task:cat state:S stack:0 pid:28630 tgid:28630 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __mod_timer+0x304/0xa80 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_timeout+0xfb/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_schedule_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx_process_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? down_write+0xc4/0x140 [Fri Jun 27 15:21:34 2025] msleep_interruptible+0xbe/0x150 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_write+0x54/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ Link: https://lkml.kernel.org/r/20250627072924.36567-3-lance.yang@linux.dev Signed-off-by: Lance Yang Suggested-by: Masami Hiramatsu (Google) Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/hung_task.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 1bc2b3244613..34e615c76ca5 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -21,17 +21,17 @@ * type. * * Type encoding: - * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) - * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) - * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) - * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rw-semaphore as READER (BLOCKER_TYPE_RWSEM_READER) + * 11 - Blocked on rw-semaphore as WRITER (BLOCKER_TYPE_RWSEM_WRITER) */ -#define BLOCKER_TYPE_MUTEX 0x00UL -#define BLOCKER_TYPE_SEM 0x01UL -#define BLOCKER_TYPE_RTMUTEX 0x02UL -#define BLOCKER_TYPE_RWSEM 0x03UL +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RWSEM_READER 0x02UL +#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL -#define BLOCKER_TYPE_MASK 0x03UL +#define BLOCKER_TYPE_MASK 0x03UL #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER static inline void hung_task_set_blocker(void *lock, unsigned long type) -- cgit v1.2.3 From b3d5fd6f82dde8c906dc2a587003a44252ae5eae Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:56 +0800 Subject: lib/math/gcd: use static key to select implementation at runtime Patch series "Optimize GCD performance on RISC-V by selecting implementation at runtime", v3. The current implementation of gcd() selects between the binary GCD and the odd-even GCD algorithm at compile time, depending on whether CONFIG_CPU_NO_EFFICIENT_FFS is set. On platforms like RISC-V, however, this compile-time decision can be misleading: even when the compiler emits ctz instructions based on the assumption that they are efficient (as is the case when CONFIG_RISCV_ISA_ZBB is enabled), the actual hardware may lack support for the Zbb extension. In such cases, ffs() falls back to a software implementation at runtime, making the binary GCD algorithm significantly slower than the odd-even variant. To address this, we introduce a static key to allow runtime selection between the binary and odd-even GCD implementations. On RISC-V, the kernel now checks for Zbb support during boot. If Zbb is unavailable, the static key is disabled so that gcd() consistently uses the more efficient odd-even algorithm in that scenario. Additionally, to further reduce code size, we select CONFIG_CPU_NO_EFFICIENT_FFS automatically when CONFIG_RISCV_ISA_ZBB is not enabled, avoiding compilation of the unused binary GCD implementation entirely on systems where it would never be executed. This series ensures that the most efficient GCD algorithm is used in practice and avoids compiling unnecessary code based on hardware capabilities and kernel configuration. This patch (of 3): On platforms like RISC-V, the compiler may generate hardware FFS instructions even if the underlying CPU does not actually support them. Currently, the GCD implementation is chosen at compile time based on CONFIG_CPU_NO_EFFICIENT_FFS, which can result in suboptimal behavior on such systems. Introduce a static key, efficient_ffs_key, to enable runtime selection between the binary GCD (using ffs) and the odd-even GCD implementation. This allows the kernel to default to the faster binary GCD when FFS is efficient, while retaining the ability to fall back when needed. Link: https://lkml.kernel.org/r/20250606134758.1308400-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20250606134758.1308400-2-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Alexandre Ghiti Signed-off-by: Andrew Morton --- include/linux/gcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/gcd.h b/include/linux/gcd.h index cb572677fd7f..616e81a7f7e3 100644 --- a/include/linux/gcd.h +++ b/include/linux/gcd.h @@ -3,6 +3,9 @@ #define _GCD_H #include +#include + +DECLARE_STATIC_KEY_TRUE(efficient_ffs_key); unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__; -- cgit v1.2.3 From 438794e93f6271af93f0d16a1851725115b5fd51 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 17 Jul 2025 09:48:13 +0300 Subject: net/mlx5: Add IFC bits to support RSS for IPSec offload This adds the capabilities, ipsec_next_header and inner/outer l4_type_ext fields to support RSS for the decrypted packets. These fields are specifically for firmware steering. HWS validation logic is updated to correctly handle the changes, ensuring the unsupported fields are not set. Besides, reserved_at_c4 is fixed to reserved_at_d4 to reflect the accurate offset within the structure. Signed-off-by: Jianbo Liu Reviewed-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1752734895-257735-2-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 639dd0b56655..c9a7773ac8ec 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -420,7 +420,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits { /* Table 2170 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_flow_table_fields_supported_2_bits { - u8 reserved_at_0[0x2]; + u8 inner_l4_type_ext[0x1]; + u8 outer_l4_type_ext[0x1]; u8 inner_l4_type[0x1]; u8 outer_l4_type[0x1]; u8 reserved_at_4[0xa]; @@ -429,7 +430,11 @@ struct mlx5_ifc_flow_table_fields_supported_2_bits { u8 tunnel_header_0_1[0x1]; u8 reserved_at_11[0xf]; - u8 reserved_at_20[0x60]; + u8 reserved_at_20[0xf]; + u8 ipsec_next_header[0x1]; + u8 reserved_at_30[0x10]; + + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_flow_table_prop_layout_bits { @@ -552,6 +557,13 @@ enum { MLX5_PACKET_L4_TYPE_UDP, }; +enum { + MLX5_PACKET_L4_TYPE_EXT_NONE, + MLX5_PACKET_L4_TYPE_EXT_TCP, + MLX5_PACKET_L4_TYPE_EXT_UDP, + MLX5_PACKET_L4_TYPE_EXT_ICMP, +}; + struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; @@ -578,10 +590,10 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 tcp_dport[0x10]; u8 l4_type[0x2]; - u8 reserved_at_c2[0xe]; + u8 l4_type_ext[0x4]; + u8 reserved_at_c6[0xa]; u8 ipv4_ihl[0x4]; - u8 reserved_at_c4[0x4]; - + u8 reserved_at_d4[0x4]; u8 ttl_hoplimit[0x8]; u8 udp_sport[0x10]; @@ -689,10 +701,9 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; u8 reserved_at_1a0[0x8]; - u8 macsec_syndrome[0x8]; u8 ipsec_syndrome[0x8]; - u8 reserved_at_1b8[0x8]; + u8 ipsec_next_header[0x8]; u8 reserved_at_1c0[0x40]; }; -- cgit v1.2.3 From 6f09ee0b583cad4f2b6a82842c26235bee3d5c2e Mon Sep 17 00:00:00 2001 From: Oren Sidi Date: Thu, 17 Jul 2025 09:48:14 +0300 Subject: net/mlx5: Add IFC bits and enums for buf_ownership Extend structure layouts and defines buf_ownership. buf_ownership indicates whether the buffer is managed by SW or FW. Signed-off-by: Oren Sidi Reviewed-by: Alex Lazar Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1752734895-257735-3-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c9a7773ac8ec..e1220aa1e7dc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10474,8 +10474,16 @@ struct mlx5_ifc_pifr_reg_bits { u8 port_filter_update_en[8][0x20]; }; +enum { + MLX5_BUF_OWNERSHIP_UNKNOWN = 0x0, + MLX5_BUF_OWNERSHIP_FW_OWNED = 0x1, + MLX5_BUF_OWNERSHIP_SW_OWNED = 0x2, +}; + struct mlx5_ifc_pfcc_reg_bits { - u8 reserved_at_0[0x8]; + u8 reserved_at_0[0x4]; + u8 buf_ownership[0x2]; + u8 reserved_at_6[0x2]; u8 local_port[0x8]; u8 reserved_at_10[0xb]; u8 ppan_mask_n[0x1]; @@ -10611,7 +10619,9 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 fec_200G_per_lane_in_pplm[0x1]; u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; - u8 reserved_at_49[0x1f]; + u8 reserved_at_49[0xa]; + u8 buffer_ownership[0x1]; + u8 resereved_at_54[0x14]; u8 fec_50G_per_lane_in_pplm[0x1]; u8 reserved_at_69[0x4]; u8 rx_icrc_encapsulated_counter[0x1]; -- cgit v1.2.3 From 9a0048e0ae14cb7babfd459ec920234e8a2ab86e Mon Sep 17 00:00:00 2001 From: Oren Sidi Date: Thu, 17 Jul 2025 09:48:15 +0300 Subject: net/mlx5: Expose cable_length field in PFCC register Introduce new "cable_length" field in PFCC register and related fields to enhance rx buffer configuration management: 1. cable_length: Shifts cable length handling to fw by storing a manually entered length from user in PFCC.cable_length 2. lane_rate_oper: In a case where PFCC.cable_length is not supported, helps compute a default cable length Signed-off-by: Oren Sidi Reviewed-by: Alex Lazar Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1752734895-257735-4-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e1220aa1e7dc..ed4130e49c27 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9994,6 +9994,10 @@ struct mlx5_ifc_pude_reg_bits { u8 reserved_at_20[0x60]; }; +enum { + MLX5_PTYS_CONNECTOR_TYPE_PORT_DA = 0x7, +}; + struct mlx5_ifc_ptys_reg_bits { u8 reserved_at_0[0x1]; u8 an_disable_admin[0x1]; @@ -10030,7 +10034,8 @@ struct mlx5_ifc_ptys_reg_bits { u8 ib_link_width_oper[0x10]; u8 ib_proto_oper[0x10]; - u8 reserved_at_160[0x1c]; + u8 reserved_at_160[0x8]; + u8 lane_rate_oper[0x14]; u8 connector_type[0x4]; u8 eth_proto_lp_advertise[0x20]; @@ -10485,7 +10490,8 @@ struct mlx5_ifc_pfcc_reg_bits { u8 buf_ownership[0x2]; u8 reserved_at_6[0x2]; u8 local_port[0x8]; - u8 reserved_at_10[0xb]; + u8 reserved_at_10[0xa]; + u8 cable_length_mask[0x1]; u8 ppan_mask_n[0x1]; u8 minor_stall_mask[0x1]; u8 critical_stall_mask[0x1]; @@ -10514,7 +10520,10 @@ struct mlx5_ifc_pfcc_reg_bits { u8 device_stall_minor_watermark[0x10]; u8 device_stall_critical_watermark[0x10]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x18]; + u8 cable_length[0x8]; + + u8 reserved_at_c0[0x40]; }; struct mlx5_ifc_pelc_reg_bits { @@ -10615,7 +10624,9 @@ struct mlx5_ifc_mtutc_reg_bits { struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x10]; u8 ppcnt_recovery_counters[0x1]; - u8 reserved_at_11[0xc]; + u8 reserved_at_11[0x7]; + u8 cable_length[0x1]; + u8 reserved_at_19[0x4]; u8 fec_200G_per_lane_in_pplm[0x1]; u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; -- cgit v1.2.3 From 7114b74d99a3cd588da4ecb6011858c06f8408a1 Mon Sep 17 00:00:00 2001 From: Florin Leotescu Date: Tue, 3 Jun 2025 14:31:22 +0300 Subject: hwmon: (emc2305) Add support for PWM frequency, polarity and output Add three new attributes to the driver data structures to support configuration of PWM frequency, PWM polarity and PWM output config. Signed-off-by: Florin Leotescu Link: https://lore.kernel.org/r/20250603113125.3175103-2-florin.leotescu@oss.nxp.com Signed-off-by: Guenter Roeck --- include/linux/platform_data/emc2305.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/emc2305.h b/include/linux/platform_data/emc2305.h index 54d672dd6f7d..76043a97f975 100644 --- a/include/linux/platform_data/emc2305.h +++ b/include/linux/platform_data/emc2305.h @@ -9,14 +9,20 @@ * struct emc2305_platform_data - EMC2305 driver platform data * @max_state: maximum cooling state of the cooling device; * @pwm_num: number of active channels; + * @pwm_output_mask: PWM output mask + * @pwm_polarity_mask: PWM polarity mask * @pwm_separate: separate PWM settings for every channel; * @pwm_min: array of minimum PWM per channel; + * @pwm_freq: array of PWM frequency per channel */ struct emc2305_platform_data { u8 max_state; u8 pwm_num; + u8 pwm_output_mask; + u8 pwm_polarity_mask; bool pwm_separate; u8 pwm_min[EMC2305_PWM_MAX]; + u16 pwm_freq[EMC2305_PWM_MAX]; }; #endif -- cgit v1.2.3 From 34b1cb4ec286603127aa8c4191ea527eb8dd3567 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Tue, 15 Jul 2025 17:40:41 +0530 Subject: soundwire: amd: Add support for acp7.2 platform Add soundwire support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://lore.kernel.org/r/20250715121048.1795607-1-venkataprasad.potturu@amd.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_amd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 6b839987f14c..fe31773d5210 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -30,6 +30,7 @@ #define ACP63_PCI_REV_ID 0x63 #define ACP70_PCI_REV_ID 0x70 #define ACP71_PCI_REV_ID 0x71 +#define ACP72_PCI_REV_ID 0x72 struct acp_sdw_pdata { u16 instance; -- cgit v1.2.3 From 0b0cd1857b783711b4bdfb8eb513c263b8a84f6d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 7 Jul 2025 10:24:37 +0800 Subject: dt-bindings: clock: Add support for i.MX94 LVDS/DISPLAY CSR Add i.MX94 LVDS/DISPLAY CSR compatible string. Add clock index for the two CSRs. Reviewed-by: Abel Vesa Signed-off-by: Peng Fan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250707-imx95-blk-ctl-7-1-v3-1-c1b676ec13be@nxp.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/nxp,imx94-clock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/dt-bindings/clock/nxp,imx94-clock.h (limited to 'include') diff --git a/include/dt-bindings/clock/nxp,imx94-clock.h b/include/dt-bindings/clock/nxp,imx94-clock.h new file mode 100644 index 000000000000..c4ba13352b99 --- /dev/null +++ b/include/dt-bindings/clock/nxp,imx94-clock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright 2025 NXP + */ + +#ifndef __DT_BINDINGS_CLOCK_IMX94_H +#define __DT_BINDINGS_CLOCK_IMX94_H + +#define IMX94_CLK_DISPMIX_CLK_SEL 0 + +#define IMX94_CLK_DISPMIX_LVDS_CLK_GATE 0 + +#endif /* __DT_BINDINGS_CLOCK_IMX94_H */ -- cgit v1.2.3 From ee4a2e08c10188f02fe3fb36b6beddc3c2fdb287 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 8 Jul 2025 19:27:41 +1000 Subject: entry: Add arch_in_rcu_eqs() All architectures have an interruptible RCU extended quiescent state (EQS) as part of their idle sequences, where interrupts can occur without RCU watching. Entry code must account for this and wake RCU as necessary; the common entry code deals with this in irqentry_enter() by treating any interrupt from an idle thread as potentially having occurred within an EQS and waking RCU for the duration of the interrupt via rcu_irq_enter() .. rcu_irq_exit(). Some architectures may have other interruptible EQSs which require similar treatment. For example, on s390 it is necessary to enable interrupts around guest entry in the middle of a period where core KVM code has entered an EQS. So that architectures can wake RCU in these cases, this patch adds a new arch_in_rcu_eqs() hook to the common entry code which is checked in addition to the existing is_idle_thread() check, with RCU woken if either returns true. A default implementation is provided which always returns false, which suffices for most architectures. As no architectures currently implement arch_in_rcu_eqs(), there should be no functional change as a result of this patch alone. A subsequent patch will add an s390 implementation to fix a latent bug with missing RCU wakeups. [ajd@linux.ibm.com: rebase, fix commit message] Signed-off-by: Mark Rutland Cc: Andy Lutomirski Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Claudio Imbrenda Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Janosch Frank Reviewed-by: Christian Borntraeger Signed-off-by: Andrew Donnellan Acked-by: Peter Zijlstra (Intel) Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20250708092742.104309-2-ajd@linux.ibm.com Signed-off-by: Janosch Frank Message-ID: <20250708092742.104309-2-ajd@linux.ibm.com> --- include/linux/entry-common.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index f94f3fdf15fc..3bf99cbad8a3 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -86,6 +86,22 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} #endif +/** + * arch_in_rcu_eqs - Architecture specific check for RCU extended quiescent + * states. + * + * Returns: true if the CPU is potentially in an RCU EQS, false otherwise. + * + * Architectures only need to define this if threads other than the idle thread + * may have an interruptible EQS. This does not need to handle idle threads. It + * is safe to over-estimate at the cost of redundant RCU management work. + * + * Invoked from irqentry_enter() + */ +#ifndef arch_in_rcu_eqs +static __always_inline bool arch_in_rcu_eqs(void) { return false; } +#endif + /** * enter_from_user_mode - Establish state when coming from user mode * -- cgit v1.2.3 From 2b752ae0231f7b20cd2b8cad0b4ab36b16d4be88 Mon Sep 17 00:00:00 2001 From: Gary Yang Date: Mon, 21 Jul 2025 22:44:58 +0800 Subject: dt-bindings: clock: cix: Add CIX sky1 scmi clock id Add device tree bindings for the scmi clock id on Cix sky1 platform. Acked-by: Krzysztof Kozlowski Reviewed-by: Peter Chen Signed-off-by: Gary Yang Signed-off-by: Peter Chen Signed-off-by: Arnd Bergmann --- include/dt-bindings/clock/cix,sky1.h | 279 +++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 include/dt-bindings/clock/cix,sky1.h (limited to 'include') diff --git a/include/dt-bindings/clock/cix,sky1.h b/include/dt-bindings/clock/cix,sky1.h new file mode 100644 index 000000000000..9245ebd1e80a --- /dev/null +++ b/include/dt-bindings/clock/cix,sky1.h @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright 2024-2025 Cix Technology Group Co., Ltd. + */ + +#ifndef _DT_BINDINGS_CLK_CIX_SKY1_H +#define _DT_BINDINGS_CLK_CIX_SKY1_H + +#define CLK_TREE_CPU_GICxCLK 0 +#define CLK_TREE_CPU_PPUCLK 1 +#define CLK_TREE_CPU_PERIPHCLK 2 +#define CLK_TREE_DSU_CLK 3 +#define CLK_TREE_DSU_PCLK 4 +#define CLK_TREE_CPU_CLK_BC0 5 +#define CLK_TREE_CPU_CLK_BC1 6 +#define CLK_TREE_CPU_CLK_BC2 7 +#define CLK_TREE_CPU_CLK_BC3 8 +#define CLK_TREE_CPU_CLK_MC0 9 +#define CLK_TREE_CPU_CLK_MC1 10 +#define CLK_TREE_CPU_CLK_MC2 11 +#define CLK_TREE_CPU_CLK_MC3 12 +#define CLK_TREE_CPU_CLK_LC0 13 +#define CLK_TREE_CPU_CLK_LC1 14 +#define CLK_TREE_CPU_CLK_LC2 15 +#define CLK_TREE_CPU_CLK_LC3 16 +#define CLK_TREE_CSI_CTRL0_PCLK 17 +#define CLK_TREE_CSI_CTRL1_PCLK 18 +#define CLK_TREE_CSI_CTRL2_PCLK 19 +#define CLK_TREE_CSI_CTRL3_PCLK 20 +#define CLK_TREE_CSI_DMA0_PCLK 21 +#define CLK_TREE_CSI_DMA1_PCLK 22 +#define CLK_TREE_CSI_DMA2_PCLK 23 +#define CLK_TREE_CSI_DMA3_PCLK 24 +#define CLK_TREE_CSI_PHY0_PSM 25 +#define CLK_TREE_CSI_PHY1_PSM 26 +#define CLK_TREE_CSI_PHY0_APBCLK 27 +#define CLK_TREE_CSI_PHY1_APBCLK 28 +#define CLK_TREE_FCH_APB_CLK 29 +#define CLK_TREE_GPU_CLK_400M 30 +#define CLK_TREE_GPU_CLK_CORE 31 +#define CLK_TREE_GPU_CLK_STACKS 32 +#define CLK_TREE_DP0_PIXEL0 33 +#define CLK_TREE_DP0_PIXEL1 34 +#define CLK_TREE_DP1_PIXEL0 35 +#define CLK_TREE_DP1_PIXEL1 36 +#define CLK_TREE_DP2_PIXEL0 37 +#define CLK_TREE_DP2_PIXEL1 38 +#define CLK_TREE_DP3_PIXEL0 39 +#define CLK_TREE_DP3_PIXEL1 40 +#define CLK_TREE_DP4_PIXEL0 41 +#define CLK_TREE_DP4_PIXEL1 42 +#define CLK_TREE_DPU_CLK 43 +#define CLK_TREE_DPU0_ACLK 44 +#define CLK_TREE_DPU1_ACLK 45 +#define CLK_TREE_DPU2_ACLK 46 +#define CLK_TREE_DPU3_ACLK 47 +#define CLK_TREE_DPU4_ACLK 48 +#define CLK_TREE_DPC0_VIDCLK0 49 +#define CLK_TREE_DPC0_VIDCLK1 50 +#define CLK_TREE_DPC1_VIDCLK0 51 +#define CLK_TREE_DPC1_VIDCLK1 52 +#define CLK_TREE_DPC2_VIDCLK0 53 +#define CLK_TREE_DPC2_VIDCLK1 54 +#define CLK_TREE_DPC3_VIDCLK0 55 +#define CLK_TREE_DPC3_VIDCLK1 56 +#define CLK_TREE_DPC4_VIDCLK0 57 +#define CLK_TREE_DPC4_VIDCLK1 58 +#define CLK_TREE_DPC0_APBCLK 59 +#define CLK_TREE_DPC1_APBCLK 60 +#define CLK_TREE_DPC2_APBCLK 61 +#define CLK_TREE_DPC3_APBCLK 62 +#define CLK_TREE_DPC4_APBCLK 63 +#define CLK_TREE_NPU_MEMCLK 64 +#define CLK_TREE_NPU_SYSCLK 65 +#define CLK_TREE_NPU_DBGCLK 66 +#define CLK_TREE_VPU_APBCLK 67 +#define CLK_TREE_ISP_ACLK 68 +#define CLK_TREE_ISP_SCLK 69 +#define CLK_TREE_AUDIO_CLK4 70 +#define CLK_TREE_AUDIO_CLK5 71 +#define CLK_TREE_CAMERA_MCLK0 72 +#define CLK_TREE_CAMERA_MCLK1 73 +#define CLK_TREE_CAMERA_MCLK2 74 +#define CLK_TREE_CAMERA_MCLK3 75 +#define CLK_TREE_AUDIO_CLK0 76 +#define CLK_TREE_AUDIO_CLK1 77 +#define CLK_TREE_AUDIO_CLK2 78 +#define CLK_TREE_AUDIO_CLK3 79 +#define CLK_TREE_MM_NI700_CLK 80 +#define CLK_TREE_SYS_NI700_CLK 81 +#define CLK_TREE_GMAC0_ACLK 82 +#define CLK_TREE_GMAC1_ACLK 83 +#define CLK_TREE_GMAC0_DIV_ACLK 84 +#define CLK_TREE_GMAC0_DIV_TXCLK 85 +#define CLK_TREE_GMAC0_RGMII0_TXCLK 86 +#define CLK_TREE_GMAC1_DIV_ACLK 87 +#define CLK_TREE_GMAC1_DIV_TXCLK 88 +#define CLK_TREE_GMAC1_RGMII0_TXCLK 89 +#define CLK_TREE_GMAC0_PCLK 90 +#define CLK_TREE_GMAC1_PCLK 91 +#define CLK_TREE_USB2_0_AXI_GATE 92 +#define CLK_TREE_USB2_0_APB_GATE 93 +#define CLK_TREE_USB2_1_AXI_GATE 94 +#define CLK_TREE_USB2_1_APB_GATE 95 +#define CLK_TREE_USB2_2_AXI_GATE 96 +#define CLK_TREE_USB2_2_APB_GATE 97 +#define CLK_TREE_USB2_3_AXI_GATE 98 +#define CLK_TREE_USB2_3_APB_GATE 99 +#define CLK_TREE_USB2_0_PHY_GATE 100 +#define CLK_TREE_USB2_1_PHY_GATE 101 +#define CLK_TREE_USB2_2_PHY_GATE 102 +#define CLK_TREE_USB2_3_PHY_GATE 103 +#define CLK_TREE_USB3C_DRD_AXI_GATE 104 +#define CLK_TREE_USB3C_DRD_APB_GATE 105 +#define CLK_TREE_USB3C_DRD_PHY2_GATE 106 +#define CLK_TREE_USB3C_DRD_PHY3_GATE 107 +#define CLK_TREE_USB3C_0_AXI_GATE 108 +#define CLK_TREE_USB3C_0_APB_GATE 109 +#define CLK_TREE_USB3C_0_PHY2_GATE 110 +#define CLK_TREE_USB3C_0_PHY3_GATE 111 +#define CLK_TREE_USB3C_1_AXI_GATE 112 +#define CLK_TREE_USB3C_1_APB_GATE 113 +#define CLK_TREE_USB3C_1_PHY2_GATE 114 +#define CLK_TREE_USB3C_1_PHY3_GATE 115 +#define CLK_TREE_USB3C_2_AXI_GATE 116 +#define CLK_TREE_USB3C_2_APB_GATE 117 +#define CLK_TREE_USB3C_2_PHY2_GATE 118 +#define CLK_TREE_USB3C_2_PHY3_GATE 119 +#define CLK_TREE_USB3A_0_AXI_GATE 120 +#define CLK_TREE_USB3A_0_APB_GATE 121 +#define CLK_TREE_USB3A_0_PHY2_GATE 122 +#define CLK_TREE_USB3A_1_AXI_GATE 123 +#define CLK_TREE_USB3A_1_APB_GATE 124 +#define CLK_TREE_USB3A_1_PHY2_GATE 125 +#define CLK_TREE_USB3A_PHY3_GATE 126 +#define CLK_TREE_USB2_0_CLK_SOF 127 +#define CLK_TREE_USB2_1_CLK_SOF 128 +#define CLK_TREE_USB2_2_CLK_SOF 129 +#define CLK_TREE_USB2_3_CLK_SOF 130 +#define CLK_TREE_USB3C_DRD_CLK_SOF 131 +#define CLK_TREE_USB3C_H0_CLK_SOF 132 +#define CLK_TREE_USB3C_H1_CLK_SOF 133 +#define CLK_TREE_USB3C_H2_CLK_SOF 134 +#define CLK_TREE_USB3A_H0_CLK_SOF 135 +#define CLK_TREE_USB3A_H1_CLK_SOF 136 +#define CLK_TREE_USB2_0_CLK_LPM 137 +#define CLK_TREE_USB2_1_CLK_LPM 138 +#define CLK_TREE_USB2_2_CLK_LPM 139 +#define CLK_TREE_USB2_3_CLK_LPM 140 +#define CLK_TREE_USB3C_DRD_CLK_LPM 141 +#define CLK_TREE_USB3C_H0_CLK_LPM 142 +#define CLK_TREE_USB3C_H1_CLK_LPM 143 +#define CLK_TREE_USB3C_H2_CLK_LPM 144 +#define CLK_TREE_USB3A_H0_CLK_LPM 145 +#define CLK_TREE_USB3A_H1_CLK_LPM 146 +#define CLK_TREE_USB2_0_PHY_REF 147 +#define CLK_TREE_USB2_1_PHY_REF 148 +#define CLK_TREE_USB2_2_PHY_REF 149 +#define CLK_TREE_USB2_3_PHY_REF 150 +#define CLK_TREE_USB3C_DRD_PHY_REF 151 +#define CLK_TREE_USB3C_H0_PHY_REF 152 +#define CLK_TREE_USB3C_H1_PHY_REF 153 +#define CLK_TREE_USB3C_H2_PHY_REF 154 +#define CLK_TREE_USB3A_H0_PHY_REF 155 +#define CLK_TREE_USB3A_H1_PHY_REF 156 +#define CLK_TREE_USB3C_DRD_PHY_x4_REF 157 +#define CLK_TREE_USB3C_H0_PHY_x4_REF 158 +#define CLK_TREE_USB3C_H1_PHY_x4_REF 159 +#define CLK_TREE_USB3C_H2_PHY_x4_REF 160 +#define CLK_TREE_USB3A_PHY_x2_REF 161 +#define CLK_TREE_PCIE_X8CTRL_APB 162 +#define CLK_TREE_PCIE_X4CTRL_APB 163 +#define CLK_TREE_PCIE_X2CTRL_APB 164 +#define CLK_TREE_PCIE_X1_0CTRL_APB 165 +#define CLK_TREE_PCIE_X1_1CTRL_APB 166 +#define CLK_TREE_PCIE_X8_PHY_APB 167 +#define CLK_TREE_PCIE_X4_PHY_APB 168 +#define CLK_TREE_PCIE_X211_PHY_APB 169 +#define CLK_TREE_PCIE_NI700_CLK 170 +#define CLK_TREE_PCIE_CTRL0_CLK 171 +#define CLK_TREE_PCIE_CTRL1_CLK 172 +#define CLK_TREE_PCIE_CTRL2_CLK 173 +#define CLK_TREE_PCIE_CTRL3_CLK 174 +#define CLK_TREE_PCIE_CTRL4_CLK 175 +#define CLK_TREE_CSI_CTRL0_SYSCLK 176 +#define CLK_TREE_CSI_CTRL1_SYSCLK 177 +#define CLK_TREE_CSI_CTRL2_SYSCLK 178 +#define CLK_TREE_CSI_CTRL3_SYSCLK 179 +#define CLK_TREE_CSI_CTRL0_PIXEL0_CLK 180 +#define CLK_TREE_CSI_CTRL0_PIXEL1_CLK 181 +#define CLK_TREE_CSI_CTRL0_PIXEL2_CLK 182 +#define CLK_TREE_CSI_CTRL0_PIXEL3_CLK 183 +#define CLK_TREE_CSI_CTRL1_PIXEL0_CLK 184 +#define CLK_TREE_CSI_CTRL2_PIXEL0_CLK 185 +#define CLK_TREE_CSI_CTRL2_PIXEL1_CLK 186 +#define CLK_TREE_CSI_CTRL2_PIXEL2_CLK 187 +#define CLK_TREE_CSI_CTRL2_PIXEL3_CLK 188 +#define CLK_TREE_CSI_CTRL3_PIXEL0_CLK 189 +#define CLK_TREE_CI700_GCLK0 190 +#define CLK_TREE_DDRC0_ACLK_CLK 191 +#define CLK_TREE_DDRC1_ACLK_CLK 192 +#define CLK_TREE_DDRC2_ACLK_CLK 193 +#define CLK_TREE_DDRC3_ACLK_CLK 194 +#define CLK_TREE_DDRC0_DFICLK_CLK 195 +#define CLK_TREE_DDRC1_DFICLK_CLK 196 +#define CLK_TREE_DDRC2_DFICLK_CLK 197 +#define CLK_TREE_DDRC3_DFICLK_CLK 198 +#define CLK_TREE_PHY0_SYNC_CLK 199 +#define CLK_TREE_PHY1_SYNC_CLK 200 +#define CLK_TREE_PHY2_SYNC_CLK 201 +#define CLK_TREE_PHY3_SYNC_CLK 202 +#define CLK_TREE_PHY0_BYPASS_CLK 203 +#define CLK_TREE_PHY1_BYPASS_CLK 204 +#define CLK_TREE_PHY2_BYPASS_CLK 205 +#define CLK_TREE_PHY3_BYPASS_CLK 206 +#define CLK_TREE_DDRC_0_APB 207 +#define CLK_TREE_DDRC_1_APB 208 +#define CLK_TREE_DDRC_2_APB 209 +#define CLK_TREE_DDRC_3_APB 210 +#define CLK_TREE_TZC400_0_APB 211 +#define CLK_TREE_TZC400_1_APB 212 +#define CLK_TREE_TZC400_2_APB 213 +#define CLK_TREE_TZC400_3_APB 214 +#define CLK_TREE_S5_SENSOR_HUB_25M 215 +#define CLK_TREE_S5_SENSOR_HUB_400M 216 +#define CLK_TREE_S5_CSS600_100M 217 +#define CLK_TREE_S5_DFD_800M 218 +#define CLK_TREE_S5_CSU_SE_800M 219 +#define CLK_TREE_S5_CSU_PM_800M 220 +#define CLK_TREE_PCIE_REF_B0 221 +#define CLK_TREE_PCIE_REF_B1 222 +#define CLK_TREE_PCIE_REF_B2 223 +#define CLK_TREE_PCIE_REF_B3 224 +#define CLK_TREE_PCIE_REF_B4 225 +#define CLK_TREE_PCIE_REF_PHY_X8 226 +#define CLK_TREE_PCIE_REF_PHY_X4 227 +#define CLK_TREE_PCIE_REF_PHY_X211 228 +#define CLK_TREE_GMAC_REC_CLK 229 +#define CLK_TREE_GPUTOP_PLL 230 +#define CLK_TREE_GPUCORE_PLL 231 +#define CLK_TREE_CPU_PLL_LIT 232 +#define CLK_TREE_CPU_PLL0 233 +#define CLK_TREE_CPU_PLL1 234 +#define CLK_TREE_CPU_PLL2 235 +#define CLK_TREE_CPU_PLL3 236 +#define CLK_TREE_FCH_I3C0_FUNC 237 +#define CLK_TREE_FCH_I3C1_FUNC 238 +#define CLK_TREE_FCH_DMA_ACLK 239 +#define CLK_TREE_FCH_XSPI_FUNC 240 +#define CLK_TREE_FCH_XSPI_MACLK 241 +#define CLK_TREE_FCH_TIMER_FUN 242 +#define CLK_TREE_FCH_APB_IO_S0 243 +#define CLK_TREE_FCH_I3C0_APB 244 +#define CLK_TREE_FCH_I3C1_APB 245 +#define CLK_TREE_FCH_UART0_APB 246 +#define CLK_TREE_FCH_UART1_APB 247 +#define CLK_TREE_FCH_UART2_APB 248 +#define CLK_TREE_FCH_UART3_APB 249 +#define CLK_TREE_FCH_SPI0_APB 250 +#define CLK_TREE_FCH_SPI1_APB 251 +#define CLK_TREE_FCH_XSPI_APB 252 +#define CLK_TREE_FCH_I2C0_APB 253 +#define CLK_TREE_FCH_I2C1_APB 254 +#define CLK_TREE_FCH_I2C2_APB 255 +#define CLK_TREE_FCH_I2C3_APB 256 +#define CLK_TREE_FCH_I2C4_APB 257 +#define CLK_TREE_FCH_I2C5_APB 258 +#define CLK_TREE_FCH_I2C6_APB 259 +#define CLK_TREE_FCH_I2C7_APB 260 +#define CLK_TREE_FCH_TIMER_APB 261 +#define CLK_TREE_FCH_GPIO_APB 262 +#define CLK_TREE_FCH_UART0_FUNC 263 +#define CLK_TREE_FCH_UART1_FUNC 264 +#define CLK_TREE_FCH_UART2_FUNC 265 +#define CLK_TREE_FCH_UART3_FUNC 266 +/* 267~271 not used by AP, skip */ +#define CLK_TREE_GPU_CLK_200M 272 + +#endif -- cgit v1.2.3 From fcb476990beb55c958db0b5aa2e9ca772d0fc982 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 21 Jul 2025 18:44:17 +0800 Subject: usb: core: add urb->sgt parameter description The parameter description of urb->sgt is lost, this will add it for completeness. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20250711182803.1d548467@canb.auug.org.au/ Signed-off-by: Xu Yang Fixes: 488e6eaab88c ("usb: core: add dma-noncoherent buffer alloc and free API") Link: https://lore.kernel.org/r/20250721104417.3442530-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 535ac37198a1..9d662c6abb4d 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1455,6 +1455,10 @@ typedef void (*usb_complete_t)(struct urb *); * @sg: scatter gather buffer list, the buffer size of each element in * the list (except the last) must be divisible by the endpoint's * max packet size if no_sg_constraint isn't set in 'struct usb_bus' + * @sgt: used to hold a scatter gather table returned by usb_alloc_noncoherent(), + * which describes the allocated non-coherent and possibly non-contiguous + * memory and is guaranteed to have 1 single DMA mapped segment. The + * allocated memory needs to be freed by usb_free_noncoherent(). * @num_mapped_sgs: (internal) number of mapped sg entries * @num_sgs: number of entries in the sg list * @transfer_buffer_length: How big is transfer_buffer. The transfer may -- cgit v1.2.3 From 0f29e33fbadd7517b96f3f3e86220215d99875cb Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 9 Jul 2025 15:14:49 +0200 Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip Interconnect in Qualcomm Milos SoC Document the RPMh Network-On-Chip Interconnect of the Milos (e.g. SM7635) SoC. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20250709-sm7635-icc-v3-1-c446203c3b3a@fairphone.com Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,milos-rpmh.h | 141 +++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,milos-rpmh.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,milos-rpmh.h b/include/dt-bindings/interconnect/qcom,milos-rpmh.h new file mode 100644 index 000000000000..9326d7d9c2a3 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,milos-rpmh.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025, Luca Weiss + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MILOS_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_MILOS_H + +#define MASTER_QUP_1 0 +#define MASTER_UFS_MEM 1 +#define MASTER_USB3_0 2 +#define SLAVE_A1NOC_SNOC 3 + +#define MASTER_QDSS_BAM 0 +#define MASTER_QSPI_0 1 +#define MASTER_QUP_0 2 +#define MASTER_CRYPTO 3 +#define MASTER_IPA 4 +#define MASTER_QDSS_ETR 5 +#define MASTER_QDSS_ETR_1 6 +#define MASTER_SDCC_1 7 +#define MASTER_SDCC_2 8 +#define SLAVE_A2NOC_SNOC 9 + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define SLAVE_QUP_CORE_0 2 +#define SLAVE_QUP_CORE_1 3 + +#define MASTER_CNOC_CFG 0 +#define SLAVE_AHB2PHY_SOUTH 1 +#define SLAVE_AHB2PHY_NORTH 2 +#define SLAVE_CAMERA_CFG 3 +#define SLAVE_CLK_CTL 4 +#define SLAVE_RBCPR_CX_CFG 5 +#define SLAVE_RBCPR_MXA_CFG 6 +#define SLAVE_CRYPTO_0_CFG 7 +#define SLAVE_CX_RDPM 8 +#define SLAVE_GFX3D_CFG 9 +#define SLAVE_IMEM_CFG 10 +#define SLAVE_CNOC_MSS 11 +#define SLAVE_MX_2_RDPM 12 +#define SLAVE_MX_RDPM 13 +#define SLAVE_PDM 14 +#define SLAVE_QDSS_CFG 15 +#define SLAVE_QSPI_0 16 +#define SLAVE_QUP_0 17 +#define SLAVE_QUP_1 18 +#define SLAVE_SDC1 19 +#define SLAVE_SDCC_2 20 +#define SLAVE_TCSR 21 +#define SLAVE_TLMM 22 +#define SLAVE_UFS_MEM_CFG 23 +#define SLAVE_USB3_0 24 +#define SLAVE_VENUS_CFG 25 +#define SLAVE_VSENSE_CTRL_CFG 26 +#define SLAVE_WLAN 27 +#define SLAVE_CNOC_MNOC_HF_CFG 28 +#define SLAVE_CNOC_MNOC_SF_CFG 29 +#define SLAVE_NSP_QTB_CFG 30 +#define SLAVE_PCIE_ANOC_CFG 31 +#define SLAVE_WLAN_Q6_THROTTLE_CFG 32 +#define SLAVE_SERVICE_CNOC_CFG 33 +#define SLAVE_QDSS_STM 34 +#define SLAVE_TCU 35 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define SLAVE_AOSS 2 +#define SLAVE_DISPLAY_CFG 3 +#define SLAVE_IPA_CFG 4 +#define SLAVE_IPC_ROUTER_CFG 5 +#define SLAVE_PCIE_0_CFG 6 +#define SLAVE_PCIE_1_CFG 7 +#define SLAVE_PRNG 8 +#define SLAVE_TME_CFG 9 +#define SLAVE_APPSS 10 +#define SLAVE_CNOC_CFG 11 +#define SLAVE_DDRSS_CFG 12 +#define SLAVE_IMEM 13 +#define SLAVE_PIMEM 14 +#define SLAVE_SERVICE_CNOC 15 +#define SLAVE_PCIE_0 16 +#define SLAVE_PCIE_1 17 + +#define MASTER_GPU_TCU 0 +#define MASTER_SYS_TCU 1 +#define MASTER_APPSS_PROC 2 +#define MASTER_GFX3D 3 +#define MASTER_LPASS_GEM_NOC 4 +#define MASTER_MSS_PROC 5 +#define MASTER_MNOC_HF_MEM_NOC 6 +#define MASTER_MNOC_SF_MEM_NOC 7 +#define MASTER_COMPUTE_NOC 8 +#define MASTER_ANOC_PCIE_GEM_NOC 9 +#define MASTER_SNOC_GC_MEM_NOC 10 +#define MASTER_SNOC_SF_MEM_NOC 11 +#define MASTER_WLAN_Q6 12 +#define SLAVE_GEM_NOC_CNOC 13 +#define SLAVE_LLCC 14 +#define SLAVE_MEM_NOC_PCIE_SNOC 15 + +#define MASTER_LPASS_PROC 0 +#define SLAVE_LPASS_GEM_NOC 1 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_ICP 1 +#define MASTER_CAMNOC_SF 2 +#define MASTER_MDP 3 +#define MASTER_VIDEO 4 +#define MASTER_CNOC_MNOC_HF_CFG 5 +#define MASTER_CNOC_MNOC_SF_CFG 6 +#define SLAVE_MNOC_HF_MEM_NOC 7 +#define SLAVE_MNOC_SF_MEM_NOC 8 +#define SLAVE_SERVICE_MNOC_HF 9 +#define SLAVE_SERVICE_MNOC_SF 10 + +#define MASTER_CDSP_PROC 0 +#define SLAVE_CDSP_MEM_NOC 1 + +#define MASTER_PCIE_ANOC_CFG 0 +#define MASTER_PCIE_0 1 +#define MASTER_PCIE_1 2 +#define SLAVE_ANOC_PCIE_GEM_NOC 3 +#define SLAVE_SERVICE_PCIE_ANOC 4 + +#define MASTER_A1NOC_SNOC 0 +#define MASTER_A2NOC_SNOC 1 +#define MASTER_APSS_NOC 2 +#define MASTER_CNOC_SNOC 3 +#define MASTER_PIMEM 4 +#define MASTER_GIC 5 +#define SLAVE_SNOC_GEM_NOC_GC 6 +#define SLAVE_SNOC_GEM_NOC_SF 7 + + +#endif -- cgit v1.2.3 From ab16122115327b2a602595f539cae7e39a331d0e Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Mon, 21 Jul 2025 14:05:58 +0100 Subject: arm64: kvm, smccc: Fix vendor uuid Commit 13423063c7cb ("arm64: kvm, smccc: Introduce and use API for getting hypervisor UUID") replaced the explicit register constants with the UUID_INIT macro. However, there is an endian issue, meaning the UUID generated and used in the handshake didn't match UUID prior to the commit. The change in UUID causes the SMCCC vendor handshake to fail with older guest kernels, meaning devices such as PTP were not available in the guest. This patch updates the parameters to the macro to generate a UUID which matches the previous value, and re-establish backwards compatibility with older guest kernels. Fixes: 13423063c7cb ("arm64: kvm, smccc: Introduce and use API for getting hypervisor UUID") Signed-off-by: Jack Thomson Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20250721130558.50823-1-jackabt.amazon@gmail.com Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 784ebe4607a4..50b47eba7d01 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -113,7 +113,7 @@ /* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ #define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\ - 0xb66fb428, 0xc52e, 0xe911, \ + 0x28b46fb6, 0x2ec5, 0x11e9, \ 0xa9, 0xca, 0x4b, 0x56, \ 0x4d, 0x00, 0x3a, 0x74) -- cgit v1.2.3 From 460114eae8284155b51f6e72ed26f627ee338a30 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 21 Jul 2025 09:20:03 +0300 Subject: wifi: mac80211: remove ieee80211_remove_key It is no longer used, remove it. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250721091956.e964ceacd85c.Idecab8ef161fa58e000b3969bc936399284b79f0@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a2dbaad2f6d3..a0cf976a9117 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6032,18 +6032,6 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, int tid, struct ieee80211_key_seq *seq); -/** - * ieee80211_remove_key - remove the given key - * @keyconf: the parameter passed with the set key - * - * Context: Must be called with the wiphy mutex held. - * - * Remove the given key. If the key was uploaded to the hardware at the - * time this function is called, it is not deleted in the hardware but - * instead assumed to have been removed already. - */ -void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); - /** * ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN * @vif: the virtual interface to add the key on @@ -6070,9 +6058,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); * for the new key for each TID to set up sequence counters properly. * * IMPORTANT: If this replaces a key that is present in the hardware, - * then it will attempt to remove it during this call. In many cases - * this isn't what you want, so call ieee80211_remove_key() first for - * the key that's being replaced. + * then it will attempt to remove it during this call. */ struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, -- cgit v1.2.3 From 84b62b72b4c759b51568e44b0e8dc80f4cb8a2b9 Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 21 Jul 2025 14:51:59 +0800 Subject: wifi: cfg80211/mac80211: report link ID for unexpected frames The upper layer may require the link ID to properly handle unexpected frames. For instance, if hostapd, operating as an AP MLD, receives a data frame from a non-associated STA, it must send deauthentication to the link on which the STA is operating. Signed-off-by: Michael-CY Lee Reviewed-by: Money Wang Link: https://patch.msgid.link/20250721065159.1740992-1-michael-cy.lee@mediatek.com [edit commit message] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 44a1055a81ba..406626ff6cc8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -9048,6 +9048,7 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, /** * cfg80211_rx_spurious_frame - inform userspace about a spurious frame * @dev: The device the frame matched to + * @link_id: the link the frame was received on, -1 if not applicable or unknown * @addr: the transmitter address * @gfp: context flags * @@ -9057,13 +9058,14 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, * Return: %true if the frame was passed to userspace (or this failed * for a reason other than not having a subscription.) */ -bool cfg80211_rx_spurious_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp); +bool cfg80211_rx_spurious_frame(struct net_device *dev, const u8 *addr, + int link_id, gfp_t gfp); /** * cfg80211_rx_unexpected_4addr_frame - inform about unexpected WDS frame * @dev: The device the frame matched to * @addr: the transmitter address + * @link_id: the link the frame was received on, -1 if not applicable or unknown * @gfp: context flags * * This function is used in AP mode (only!) to inform userspace that @@ -9073,8 +9075,8 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev, * Return: %true if the frame was passed to userspace (or this failed * for a reason other than not having a subscription.) */ -bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp); +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, const u8 *addr, + int link_id, gfp_t gfp); /** * cfg80211_probe_status - notify userspace about probe status -- cgit v1.2.3 From a3b366dbf468788baa2e0ccdccd8482ac277e47b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 09:49:32 -0400 Subject: tracing, AER: Hide PCIe AER event when PCIEAER is not configured The event aer_event is only used when CONFIG_PCIEAER is configured. It should not be created when it is not. When an event is created it creates around 5K of text and meta data regardless if the tracepoint is used or not. Instead of wasting this memory, put #ifdef around the event to not create it when it is not used. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Tony Luck Link: https://lore.kernel.org/20250612094932.4a08abd6@batman.local.home Acked-by: Borislav Petkov (AMD) Signed-off-by: Steven Rostedt (Google) --- include/ras/ras_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 14c9f943d53f..c8cd0f00c845 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -252,6 +252,7 @@ TRACE_EVENT(non_standard_event, __print_hex(__get_dynamic_array(buf), __entry->len)) ); +#ifdef CONFIG_PCIEAER /* * PCIe AER Trace event * @@ -337,6 +338,7 @@ TRACE_EVENT(aer_event, __print_array(__entry->tlp_header, PCIE_STD_MAX_TLP_HEADERLOG, 4) : "Not available") ); +#endif /* CONFIG_PCIEAER */ /* * memory-failure recovery action result event -- cgit v1.2.3 From 06cc77a63f1b10e8f05e7223753883e14c35b512 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 09:58:28 -0400 Subject: alarmtimer: Hide alarmtimer_suspend event when RTC_CLASS is not configured The trace event alarmtimer_suspend is only called when RTC_CLASS is defined. As every event created can create up to 5K of text and meta data regardless if it is called or not it should not be created and waste memory. Hide the event when CONFIG_RTC_CLASS is not defined. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Link: https://lore.kernel.org/20250612095828.6d75dfa3@batman.local.home Acked-by: Thomas Gleixner Signed-off-by: Steven Rostedt (Google) --- include/trace/events/alarmtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/alarmtimer.h b/include/trace/events/alarmtimer.h index 13483c7ca70b..8e9c76a7f21b 100644 --- a/include/trace/events/alarmtimer.h +++ b/include/trace/events/alarmtimer.h @@ -20,6 +20,7 @@ TRACE_DEFINE_ENUM(ALARM_BOOTTIME_FREEZER); { 1 << ALARM_REALTIME_FREEZER, "REALTIME Freezer" }, \ { 1 << ALARM_BOOTTIME_FREEZER, "BOOTTIME Freezer" }) +#ifdef CONFIG_RTC_CLASS TRACE_EVENT(alarmtimer_suspend, TP_PROTO(ktime_t expires, int flag), @@ -41,6 +42,7 @@ TRACE_EVENT(alarmtimer_suspend, __entry->expires ) ); +#endif /* CONFIG_RTC_CLASS */ DECLARE_EVENT_CLASS(alarm_class, -- cgit v1.2.3 From 647fe16b46999258ce1aec41f4bdeabb4f0cc8e7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 10:53:11 -0400 Subject: PM: cpufreq: powernv/tracing: Move powernv_throttle trace event As the trace event powernv_throttle is only used by the powernv code, move it to a separate include file and have that code directly enable it. Trace events can take up around 5K of memory when they are defined regardless if they are used or not. It wastes memory to have them defined in configurations where the tracepoint is not used. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Madhavan Srinivasan Cc: Michael Ellerman Link: https://lore.kernel.org/20250612145407.906308844@goodmis.org Fixes: 0306e481d479a ("cpufreq: powernv/tracing: Add powernv_throttle tracepoint") Acked-by: Viresh Kumar Acked-by: Rafael J. Wysocki Signed-off-by: Steven Rostedt (Google) --- include/trace/events/power.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 6c631eec23e3..913181cebfe9 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -99,28 +99,6 @@ DEFINE_EVENT(psci_domain_idle, psci_domain_idle_exit, TP_ARGS(cpu_id, state, s2idle) ); -TRACE_EVENT(powernv_throttle, - - TP_PROTO(int chip_id, const char *reason, int pmax), - - TP_ARGS(chip_id, reason, pmax), - - TP_STRUCT__entry( - __field(int, chip_id) - __string(reason, reason) - __field(int, pmax) - ), - - TP_fast_assign( - __entry->chip_id = chip_id; - __assign_str(reason); - __entry->pmax = pmax; - ), - - TP_printk("Chip %d Pmax %d %s", __entry->chip_id, - __entry->pmax, __get_str(reason)) -); - TRACE_EVENT(pstate_sample, TP_PROTO(u32 core_busy, -- cgit v1.2.3 From e64397f81c5422d5323f5f9a0fd2438e4c403015 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 10:53:12 -0400 Subject: PM: tracing: Hide psci_domain_idle events under ARM_PSCI_CPUIDLE The events psci_domain_idle_enter and psci_domain_idle_exit events are only called when CONFIG_ARM_PSCI_CPUIDLE is defined. As each event can take up to 5K (less for DEFINE_EVENT()) regardless if they are used or not, it's best not to define them when they are not used. Add #ifdef around these events when they are not used. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Viresh Kumar Cc: Madhavan Srinivasan Cc: Michael Ellerman Link: https://lore.kernel.org/20250612145408.074769245@goodmis.org Acked-by: Rafael J. Wysocki Signed-off-by: Steven Rostedt (Google) --- include/trace/events/power.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 913181cebfe9..a10ad300d660 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -62,6 +62,7 @@ TRACE_EVENT(cpu_idle_miss, (unsigned long)__entry->state, (__entry->below)?"below":"above") ); +#ifdef CONFIG_ARM_PSCI_CPUIDLE DECLARE_EVENT_CLASS(psci_domain_idle, TP_PROTO(unsigned int cpu_id, unsigned int state, bool s2idle), @@ -98,6 +99,7 @@ DEFINE_EVENT(psci_domain_idle, psci_domain_idle_exit, TP_ARGS(cpu_id, state, s2idle) ); +#endif TRACE_EVENT(pstate_sample, -- cgit v1.2.3 From e68849097e82f7e42f3212cefbdcb7d1428e6ffd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 10:53:13 -0400 Subject: PM: tracing: Hide device_pm_callback events under PM_SLEEP The events device_pm_callback_start and device_pm_callback_end events are only called when CONFIG_PM_SLEEP is defined. As each event can take up to 5K regardless if they are used or not, it's best not to define them when they are not used. Add #ifdef around these events when they are not used. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Viresh Kumar Cc: Madhavan Srinivasan Cc: Michael Ellerman Link: https://lore.kernel.org/20250612145408.246703478@goodmis.org Acked-by: Rafael J. Wysocki Signed-off-by: Steven Rostedt (Google) --- include/trace/events/power.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index a10ad300d660..5b1df5e1e092 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -212,6 +212,7 @@ TRACE_EVENT(cpu_frequency_limits, (unsigned long)__entry->cpu_id) ); +#ifdef CONFIG_PM_SLEEP TRACE_EVENT(device_pm_callback_start, TP_PROTO(struct device *dev, const char *pm_ops, int event), @@ -260,6 +261,7 @@ TRACE_EVENT(device_pm_callback_end, TP_printk("%s %s, err=%d", __get_str(driver), __get_str(device), __entry->error) ); +#endif TRACE_EVENT(suspend_resume, -- cgit v1.2.3 From dd4186c2f2733389749182775498b03528622d70 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 10:53:14 -0400 Subject: PM: tracing: Hide power_domain_target event under ARCH_OMAP2PLUS The power_domain_target event event is only called when CONFIG_OMAP2PLUS is defined. As each event can take up to 5K regardless if they are used or not, it's best not to define them when they are not used. Add #ifdef around these events when they are not used. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Viresh Kumar Cc: Madhavan Srinivasan Cc: Michael Ellerman Link: https://lore.kernel.org/20250612145408.415483176@goodmis.org Acked-by: Rafael J. Wysocki Signed-off-by: Steven Rostedt (Google) --- include/trace/events/power.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 5b1df5e1e092..82904291c2b8 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -319,6 +319,7 @@ DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, TP_ARGS(name, state) ); +#ifdef CONFIG_ARCH_OMAP2PLUS /* * The power domain events are used for power domains transitions */ @@ -350,6 +351,7 @@ DEFINE_EVENT(power_domain, power_domain_target, TP_ARGS(name, state, cpu_id) ); +#endif /* * CPU latency QoS events used for global CPU latency QoS list updates -- cgit v1.2.3 From b769777d927af168b1389388392bfd7dc4e38399 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 2 Jun 2025 13:56:48 +0100 Subject: btrfs: use refcount_t type for the extent buffer reference counter Instead of using a bare atomic, use the refcount_t type, which despite being a structure that contains only an atomic, has an API that checks for underflows and other hazards. This doesn't change the size of the extent_buffer structure. This removes the need to do things like this: WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { (...) } And do just: if (refcount_dec_and_test(&eb->refs)) { (...) } Since refcount_dec_and_test() already triggers a warning when we decrement a ref count that has a value of 0 (or below zero). Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index bebc252db865..a32305044371 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1095,7 +1095,7 @@ TRACE_EVENT(btrfs_cow_block, TP_fast_assign_btrfs(root->fs_info, __entry->root_objectid = btrfs_root_id(root); __entry->buf_start = buf->start; - __entry->refs = atomic_read(&buf->refs); + __entry->refs = refcount_read(&buf->refs); __entry->cow_start = cow->start; __entry->buf_level = btrfs_header_level(buf); __entry->cow_level = btrfs_header_level(cow); -- cgit v1.2.3 From 44892c5a3e2d779fb056cbb69954d80e718edce1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 10 Jun 2025 18:30:09 +0200 Subject: btrfs: tree-log: add and rename extent bits for dirty_log_pages tree The dirty_log_pages tree is used for tree logging and marks extents based on log_transid. The bits could be renamed to resemble the LOG1/LOG2 naming used for the BTRFS_FS_LOG1_ERR bits. The DIRTY bit is renamed to LOG1 and NEW to LOG2. Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index a32305044371..d54fe354b390 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -144,7 +144,8 @@ FLUSH_STATES #define EXTENT_FLAGS \ { EXTENT_DIRTY, "DIRTY"}, \ { EXTENT_LOCKED, "LOCKED"}, \ - { EXTENT_NEW, "NEW"}, \ + { EXTENT_DIRTY_LOG1, "DIRTY_LOG1"}, \ + { EXTENT_DIRTY_LOG2, "DIRTY_LOG2"}, \ { EXTENT_DELALLOC, "DELALLOC"}, \ { EXTENT_DEFRAG, "DEFRAG"}, \ { EXTENT_BOUNDARY, "BOUNDARY"}, \ -- cgit v1.2.3 From 009b2056cb259c90426b3c57e5b145d1cd9fa9e2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 9 Jul 2025 16:29:17 +0200 Subject: btrfs: defrag: add flag to force no-compression Currently the defrag ioctl cannot rewrite the extents without compression. Add a new flag for that, as setting compression to 0 (or "no compression") means to do no changes to compression so take what is the current default, like mount options or properties. The defrag setting overrides mount or properties. The compression BTRFS_DEFRAG_DONT_COMPRESS is only used for in-memory operations and does not need to have a fixed value. Mount with zstd:9, copy test file from /usr/bin/ (about 260KB): $ mount -o compress=zstd:9 /dev/vda /mnt $ filefrag -vsb testfile filefrag: -b needs a blocksize option, assuming 1024-byte blocks. Filesystem type is: 9123683e File size of testfile is 297704 (292 blocks of 1024 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 127: 13312.. 13439: 128: encoded 1: 128.. 255: 13364.. 13491: 128: 13440: encoded 2: 256.. 291: 13424.. 13459: 36: 13492: last,encoded,eof testfile: 3 extents found $ compsize testfile Processed 1 file, 3 regular extents (3 refs), 0 inline, 1 fragments. Type Perc Disk Usage Uncompressed Referenced TOTAL 42% 124K 292K 292K zstd 42% 124K 292K 292K Defrag to uncompressed: $ btrfs fi defrag --nocomp testfile $ filefrag -vsb testfile filefrag: -b needs a blocksize option, assuming 1024-byte blocks. Filesystem type is: 9123683e File size of testfile is 297704 (292 blocks of 1024 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 291: 291840.. 292131: 292: last,eof testfile: 1 extent found $ compsize testfile Processed 1 file, 1 regular extents (1 refs), 0 inline, 1 fragments. Type Perc Disk Usage Uncompressed Referenced TOTAL 100% 292K 292K 292K none 100% 292K 292K 292K Compress again with LZO: $ btrfs fi defrag -clzo testfile $ filefrag -vsb testfile filefrag: -b needs a blocksize option, assuming 1024-byte blocks. Filesystem type is: 9123683e File size of testfile is 297704 (292 blocks of 1024 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 127: 13312.. 13439: 128: encoded 1: 128.. 255: 13392.. 13519: 128: 13440: encoded 2: 256.. 291: 13480.. 13515: 36: 13520: last,encoded,eof testfile: 3 extents found $ compsize testfile Processed 1 file, 3 regular extents (3 refs), 0 inline, 1 fragments. Type Perc Disk Usage Uncompressed Referenced TOTAL 64% 188K 292K 292K lzo 64% 188K 292K 292K Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dd02160015b2..8e710bbb688e 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -616,8 +616,11 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 #define BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL 4 +/* Request no compression on the range (uncompress if necessary). */ +#define BTRFS_DEFRAG_RANGE_NOCOMPRESS 8 #define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL | \ + BTRFS_DEFRAG_RANGE_NOCOMPRESS | \ BTRFS_DEFRAG_RANGE_START_IO) struct btrfs_ioctl_defrag_range_args { -- cgit v1.2.3 From 61c3e8940f2d8b5bfeaeec4bedc2f3e7d873abb3 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Jul 2025 14:06:17 +0200 Subject: net: usb: cdc-ncm: check for filtering capability If the decice does not support filtering, filtering must not be used and all packets delivered for the upper layers to sort. Signed-off-by: Oliver Neukum Link: https://patch.msgid.link/20250717120649.2090929-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- include/linux/usb/cdc_ncm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 2d207cb4837d..4ac082a63173 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -119,6 +119,7 @@ struct cdc_ncm_ctx { u32 timer_interval; u32 max_ndp_size; u8 is_ndp16; + u8 filtering_supported; union { struct usb_cdc_ncm_ndp16 *delayed_ndp16; struct usb_cdc_ncm_ndp32 *delayed_ndp32; -- cgit v1.2.3 From 51a62199a8aaac0d1645b1dd8e670a6f35aead81 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 19 Jul 2025 00:06:57 +0200 Subject: tcp: add tcp_sock_set_maxseg Add a helper tcp_sock_set_maxseg() to directly set the TCP_MAXSEG sockopt from kernel space. This new helper will be used in the following patch from MPTCP. Signed-off-by: Geliang Tang Acked-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-2-8c910fbc5307@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1a5737b3753d..57e478bfaef2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -621,6 +621,7 @@ void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); int tcp_sock_set_user_timeout(struct sock *sk, int val); +int tcp_sock_set_maxseg(struct sock *sk, int val); static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) { -- cgit v1.2.3 From a166ab7816c534973745b0fe7bce3c8cefc5426f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Jul 2025 16:43:41 -0700 Subject: ethtool: rss: support creating contexts via Netlink Support creating contexts via Netlink. Setting flow hashing fields on the new context is not supported at this stage, it can be added later. An empty indirection table is not supported. This is a carry over from the IOCTL interface where empty indirection table meant delete. We can repurpose empty indirection table in Netlink but for now to avoid confusion reject it using the policy. Support letting user choose the ID for the new context. This was not possible in IOCTL since the context ID field for the create action had to be set to the ETH_RXFH_CONTEXT_ALLOC magic value. Link: https://patch.msgid.link/20250717234343.2328602-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 130bdf5c3516..dea77abd295f 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -841,6 +841,7 @@ enum { ETHTOOL_MSG_TSCONFIG_GET, ETHTOOL_MSG_TSCONFIG_SET, ETHTOOL_MSG_RSS_SET, + ETHTOOL_MSG_RSS_CREATE_ACT, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -898,6 +899,8 @@ enum { ETHTOOL_MSG_TSCONFIG_SET_REPLY, ETHTOOL_MSG_PSE_NTF, ETHTOOL_MSG_RSS_NTF, + ETHTOOL_MSG_RSS_CREATE_ACT_REPLY, + ETHTOOL_MSG_RSS_CREATE_NTF, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) -- cgit v1.2.3 From fbe09277fa6324b50cc4eedb4d99498cf7dad897 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Jul 2025 16:43:42 -0700 Subject: ethtool: rss: support removing contexts via Netlink Implement removing additional RSS contexts via Netlink. Technically it'd be possible to shoehorn the delete operation into ethnl_request_ops-compatible handler. The code ends up longer than open coded version, and I think we'll need a custom way of sending notifications at some stage (if we allow tying the context lifetime to the netlink socket, in the future). Link: https://patch.msgid.link/20250717234343.2328602-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index dea77abd295f..e3b8813465d7 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -842,6 +842,7 @@ enum { ETHTOOL_MSG_TSCONFIG_SET, ETHTOOL_MSG_RSS_SET, ETHTOOL_MSG_RSS_CREATE_ACT, + ETHTOOL_MSG_RSS_DELETE_ACT, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -901,6 +902,7 @@ enum { ETHTOOL_MSG_RSS_NTF, ETHTOOL_MSG_RSS_CREATE_ACT_REPLY, ETHTOOL_MSG_RSS_CREATE_NTF, + ETHTOOL_MSG_RSS_DELETE_NTF, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) -- cgit v1.2.3 From 57fbad15c2eee77276a541c616589b32976d2b8e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Jul 2025 16:25:06 -0700 Subject: stackleak: Rename STACKLEAK to KSTACK_ERASE In preparation for adding Clang sanitizer coverage stack depth tracking that can support stack depth callbacks: - Add the new top-level CONFIG_KSTACK_ERASE option which will be implemented either with the stackleak GCC plugin, or with the Clang stack depth callback support. - Rename CONFIG_GCC_PLUGIN_STACKLEAK as needed to CONFIG_KSTACK_ERASE, but keep it for anything specific to the GCC plugin itself. - Rename all exposed "STACKLEAK" names and files to "KSTACK_ERASE" (named for what it does rather than what it protects against), but leave as many of the internals alone as possible to avoid even more churn. While here, also split "prev_lowest_stack" into CONFIG_KSTACK_ERASE_METRICS, since that's the only place it is referenced from. Suggested-by: Ingo Molnar Link: https://lore.kernel.org/r/20250717232519.2984886-1-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/kstack_erase.h | 89 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 4 +- include/linux/stackleak.h | 89 -------------------------------------------- 3 files changed, 92 insertions(+), 90 deletions(-) create mode 100644 include/linux/kstack_erase.h delete mode 100644 include/linux/stackleak.h (limited to 'include') diff --git a/include/linux/kstack_erase.h b/include/linux/kstack_erase.h new file mode 100644 index 000000000000..4e432eefa4d0 --- /dev/null +++ b/include/linux/kstack_erase.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KSTACK_ERASE_H +#define _LINUX_KSTACK_ERASE_H + +#include +#include + +/* + * Check that the poison value points to the unused hole in the + * virtual memory map for your platform. + */ +#define KSTACK_ERASE_POISON -0xBEEF +#define KSTACK_ERASE_SEARCH_DEPTH 128 + +#ifdef CONFIG_KSTACK_ERASE +#include +#include + +/* + * The lowest address on tsk's stack which we can plausibly erase. + */ +static __always_inline unsigned long +stackleak_task_low_bound(const struct task_struct *tsk) +{ + /* + * The lowest unsigned long on the task stack contains STACK_END_MAGIC, + * which we must not corrupt. + */ + return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long); +} + +/* + * The address immediately after the highest address on tsk's stack which we + * can plausibly erase. + */ +static __always_inline unsigned long +stackleak_task_high_bound(const struct task_struct *tsk) +{ + /* + * The task's pt_regs lives at the top of the task stack and will be + * overwritten by exception entry, so there's no need to erase them. + */ + return (unsigned long)task_pt_regs(tsk); +} + +/* + * Find the address immediately above the poisoned region of the stack, where + * that region falls between 'low' (inclusive) and 'high' (exclusive). + */ +static __always_inline unsigned long +stackleak_find_top_of_poison(const unsigned long low, const unsigned long high) +{ + const unsigned int depth = KSTACK_ERASE_SEARCH_DEPTH / sizeof(unsigned long); + unsigned int poison_count = 0; + unsigned long poison_high = high; + unsigned long sp = high; + + while (sp > low && poison_count < depth) { + sp -= sizeof(unsigned long); + + if (*(unsigned long *)sp == KSTACK_ERASE_POISON) { + poison_count++; + } else { + poison_count = 0; + poison_high = sp; + } + } + + return poison_high; +} + +static inline void stackleak_task_init(struct task_struct *t) +{ + t->lowest_stack = stackleak_task_low_bound(t); +# ifdef CONFIG_KSTACK_ERASE_METRICS + t->prev_lowest_stack = t->lowest_stack; +# endif +} + +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + +#else /* !CONFIG_KSTACK_ERASE */ +static inline void stackleak_task_init(struct task_struct *t) { } +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..b7d2f2fd4cd4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1607,8 +1607,10 @@ struct task_struct { /* Used by BPF for per-TASK xdp storage */ struct bpf_net_context *bpf_net_context; -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE unsigned long lowest_stack; +#endif +#ifdef CONFIG_KSTACK_ERASE_METRICS unsigned long prev_lowest_stack; #endif diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h deleted file mode 100644 index 3be2cb564710..000000000000 --- a/include/linux/stackleak.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_STACKLEAK_H -#define _LINUX_STACKLEAK_H - -#include -#include - -/* - * Check that the poison value points to the unused hole in the - * virtual memory map for your platform. - */ -#define STACKLEAK_POISON -0xBEEF -#define STACKLEAK_SEARCH_DEPTH 128 - -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK -#include -#include - -/* - * The lowest address on tsk's stack which we can plausibly erase. - */ -static __always_inline unsigned long -stackleak_task_low_bound(const struct task_struct *tsk) -{ - /* - * The lowest unsigned long on the task stack contains STACK_END_MAGIC, - * which we must not corrupt. - */ - return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long); -} - -/* - * The address immediately after the highest address on tsk's stack which we - * can plausibly erase. - */ -static __always_inline unsigned long -stackleak_task_high_bound(const struct task_struct *tsk) -{ - /* - * The task's pt_regs lives at the top of the task stack and will be - * overwritten by exception entry, so there's no need to erase them. - */ - return (unsigned long)task_pt_regs(tsk); -} - -/* - * Find the address immediately above the poisoned region of the stack, where - * that region falls between 'low' (inclusive) and 'high' (exclusive). - */ -static __always_inline unsigned long -stackleak_find_top_of_poison(const unsigned long low, const unsigned long high) -{ - const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); - unsigned int poison_count = 0; - unsigned long poison_high = high; - unsigned long sp = high; - - while (sp > low && poison_count < depth) { - sp -= sizeof(unsigned long); - - if (*(unsigned long *)sp == STACKLEAK_POISON) { - poison_count++; - } else { - poison_count = 0; - poison_high = sp; - } - } - - return poison_high; -} - -static inline void stackleak_task_init(struct task_struct *t) -{ - t->lowest_stack = stackleak_task_low_bound(t); -# ifdef CONFIG_STACKLEAK_METRICS - t->prev_lowest_stack = t->lowest_stack; -# endif -} - -asmlinkage void noinstr stackleak_erase(void); -asmlinkage void noinstr stackleak_erase_on_task_stack(void); -asmlinkage void noinstr stackleak_erase_off_task_stack(void); -void __no_caller_saved_registers noinstr stackleak_track_stack(void); - -#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ -static inline void stackleak_task_init(struct task_struct *t) { } -#endif - -#endif -- cgit v1.2.3 From 9ea1e8d28add49ab3c1ecfa43f08d92ee23f3e33 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Jul 2025 16:25:07 -0700 Subject: stackleak: Rename stackleak_track_stack to __sanitizer_cov_stack_depth The Clang stack depth tracking implementation has a fixed name for the stack depth tracking callback, "__sanitizer_cov_stack_depth", so rename the GCC plugin function to match since the plugin has no external dependencies on naming. Link: https://lore.kernel.org/r/20250717232519.2984886-2-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/kstack_erase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kstack_erase.h b/include/linux/kstack_erase.h index 4e432eefa4d0..bf3bf1905557 100644 --- a/include/linux/kstack_erase.h +++ b/include/linux/kstack_erase.h @@ -80,7 +80,7 @@ static inline void stackleak_task_init(struct task_struct *t) asmlinkage void noinstr stackleak_erase(void); asmlinkage void noinstr stackleak_erase_on_task_stack(void); asmlinkage void noinstr stackleak_erase_off_task_stack(void); -void __no_caller_saved_registers noinstr stackleak_track_stack(void); +void __no_caller_saved_registers noinstr __sanitizer_cov_stack_depth(void); #else /* !CONFIG_KSTACK_ERASE */ static inline void stackleak_task_init(struct task_struct *t) { } -- cgit v1.2.3 From 2424fe1cac4fc8ea0520ba22ede7544c3ddc8dd1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Jul 2025 16:25:10 -0700 Subject: arm: Handle KCOV __init vs inline mismatches When KCOV is enabled all functions get instrumented, unless the __no_sanitize_coverage attribute is used. To prepare for __no_sanitize_coverage being applied to __init functions, we have to handle differences in how GCC's inline optimizations get resolved. For arm this exposed several places where __init annotations were missing but ended up being "accidentally correct". Fix these cases and force several functions to be inline with __always_inline. Acked-by: Nishanth Menon Acked-by: Lee Jones Reviewed-by: Nishanth Menon Link: https://lore.kernel.org/r/20250717232519.2984886-5-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/mfd/dbx500-prcmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index 98567623c9df..828362b7860c 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -213,7 +213,7 @@ struct prcmu_fw_version { #if defined(CONFIG_UX500_SOC_DB8500) -static inline void prcmu_early_init(void) +static inline void __init prcmu_early_init(void) { db8500_prcmu_early_init(); } -- cgit v1.2.3 From 69fdb084355d6c0b353536024cc51aa5f7ffb62c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 21 Jul 2025 21:50:49 +0300 Subject: wifi: mac80211: don't require cipher and keylen in gtk rekey ieee80211_add_gtk_rekey receives a keyconf as an argument, and the cipher and keylen are taken from there to the new allocated key. But in rekey, both the cipher and the keylen should be the same as of the old key, so let ieee80211_add_gtk_rekey find those, so drivers won't have to fill it in. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250721214922.3c5c023bfae9.Ie6594ae2b4b6d5b3d536e642b349046ebfce7a5d@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a0cf976a9117..a45e4bee65d4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6035,7 +6035,10 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, /** * ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN * @vif: the virtual interface to add the key on - * @keyconf: new key data + * @idx: the keyidx of the key + * @key_data: the key data + * @key_len: the key data. Might be bigger than the actual key length, + * but not smaller (for the driver convinence) * @link_id: the link id of the key or -1 for non-MLO * * When GTK rekeying was done while the system was suspended, (a) new @@ -6062,7 +6065,7 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, */ struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, - struct ieee80211_key_conf *keyconf, + u8 idx, u8 *key_data, u8 key_len, int link_id); /** -- cgit v1.2.3 From 1b5f1454091e9e9fb5c944b3161acf4ec0894d0d Mon Sep 17 00:00:00 2001 From: Feng Lee <379943137@qq.com> Date: Mon, 21 Jul 2025 16:04:35 +0800 Subject: sched/idle: Remove play_idle() play_idle() is no longer in use, so delete it. Signed-off-by: Feng Lee <379943137@qq.com> Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/tencent_C3E0BD9B812C27A30FC49F1EA6A4B1352707@qq.com --- include/linux/cpu.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6378370a952f..8b1abbf5b6d2 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -187,11 +187,6 @@ static inline void arch_cpu_finalize_init(void) { } void play_idle_precise(u64 duration_ns, u64 latency_ns); -static inline void play_idle(unsigned long duration_us) -{ - play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX); -} - #ifdef CONFIG_HOTPLUG_CPU void cpuhp_report_idle_dead(void); #else -- cgit v1.2.3 From 2fb4af5ea3c735a205d97de10f044f809b20af51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:49 +0200 Subject: NFS: track active delegations per-server The active delegation watermark was added to avoid overloading servers. Track the active delegation per-server instead of globally so that clients talking to multiple servers aren't limited by the global limit. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-5-hch@lst.de Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d2d36711a119..a9b44f12623f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -254,6 +254,7 @@ struct nfs_server { struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; + atomic_long_t nr_active_delegations; struct list_head ss_copies; struct list_head ss_src_copies; -- cgit v1.2.3 From f5b3108e6a14418b120a3c38ca589b8d6cf87627 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:50 +0200 Subject: NFS: use a hash table for delegation lookup nfs_delegation_find_inode currently has to walk the entire list of delegations per inode, which can become pretty large, and can become even larger when increasing the delegation watermark. Add a hash table to speed up the delegation lookup, sized as a fraction of the delegation watermark. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250718081509.2607553-6-hch@lst.de Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a9b44f12623f..d30c0245031c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -255,6 +255,8 @@ struct nfs_server { struct list_head layouts; struct list_head delegations; atomic_long_t nr_active_delegations; + unsigned int delegation_hash_mask; + struct hlist_head *delegation_hash_table; struct list_head ss_copies; struct list_head ss_src_copies; -- cgit v1.2.3 From ad38574a8e8223361e265973fbd87013ea058c5d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:33 +0100 Subject: f2fs: Pass a folio to ADDRS_PER_PAGE() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..25857877eaec 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(&folio->page))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) -- cgit v1.2.3 From a5f3be6e652a7beaaf6c482bc013b64129a5d239 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:34 +0100 Subject: f2fs: Pass a folio to IS_INODE() All callers now have a folio so pass it in. Also make it const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 25857877eaec..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(&folio->page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) -- cgit v1.2.3 From a824388d911927b2a82bf7dcfd7cef6ee45c8b43 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:36 +0100 Subject: f2fs: Use a folio in f2fs_is_cp_guaranteed() Convert the passed page to a folio and use it throughout. Removes a use of fscrypt_is_bounce_page(), which we're trying to remove. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/fscrypt.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 56fad33043d5..8d9127a0fdb3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -518,12 +519,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); -- cgit v1.2.3 From e612423be33465d2b9822bf09e03d4e6c165e384 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2025 18:34:04 +0100 Subject: cpu/hotplug: Remove unused cpuhp_state CPUHP_PCI_XGENE_DEAD Now that the XGene MSI driver has been mostly rewritten and doesn't use the CPU hotplug infrastructure, CPUHP_PCI_XGENE_DEAD is unused. Remove it to reduce the size of cpuhp_hp_states[]. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250708173404.1278635-14-maz@kernel.org --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index df366ee15456..eaca70eb6136 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, - CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, -- cgit v1.2.3 From 07d8004d6fb95cbe48918e56012f16454cfdfe89 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 20 Jun 2025 15:08:07 +0200 Subject: tpm: add bufsiz parameter in the .send callback Add a new `bufsiz` parameter to the `.send` callback in `tpm_class_ops`. This parameter will allow drivers to differentiate between the actual command length to send and the total buffer size. Currently `bufsiz` is not used, but it will be used to implement devices with synchronous send() to send the command and receive the response on the same buffer. Also rename the previous parameter `len` to `cmd_len` in the declaration to make it clear that it contains the length in bytes of the command stored in the buffer. The semantics don't change and it can be used as before by drivers. This is an optimization since the drivers could get it from the header, but let's avoid duplicating code. While we are here, resolve a checkpatch warning: WARNING: Unnecessary space before function pointer arguments #66: FILE: include/linux/tpm.h:90: + int (*send) (struct tpm_chip *chip, u8 *buf, size_t bufsiz, Signed-off-by: Stefano Garzarella Suggested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index a3d8305e88a5..cafe8c283e88 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -87,7 +87,8 @@ struct tpm_class_ops { const u8 req_complete_val; bool (*req_canceled)(struct tpm_chip *chip, u8 status); int (*recv) (struct tpm_chip *chip, u8 *buf, size_t len); - int (*send) (struct tpm_chip *chip, u8 *buf, size_t len); + int (*send)(struct tpm_chip *chip, u8 *buf, size_t bufsiz, + size_t cmd_len); void (*cancel) (struct tpm_chip *chip); u8 (*status) (struct tpm_chip *chip); void (*update_timeouts)(struct tpm_chip *chip, -- cgit v1.2.3 From 04fe47015d7726b42c34615c124697c7a3537bf0 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 20 Jun 2025 15:08:08 +0200 Subject: tpm: support devices with synchronous send() Some devices do not support interrupts and provide a single synchronous operation to send the command and receive the response on the same buffer. Currently, these types of drivers must use an internal buffer where they temporarily store the response between .send() and .recv() calls. Introduce a new flag (TPM_CHIP_FLAG_SYNC) to support synchronous send(). If that flag is set by the driver, tpm_try_transmit() will use the send() callback to send the command and receive the response on the same buffer synchronously. In that case send() return the number of bytes of the response on success, or -errno on failure. Signed-off-by: Stefano Garzarella Suggested-by: Jason Gunthorpe Suggested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index cafe8c283e88..804fbbe3873d 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -351,6 +351,7 @@ enum tpm_chip_flags { TPM_CHIP_FLAG_SUSPENDED = BIT(8), TPM_CHIP_FLAG_HWRNG_DISABLED = BIT(9), TPM_CHIP_FLAG_DISABLE = BIT(10), + TPM_CHIP_FLAG_SYNC = BIT(11), }; #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) -- cgit v1.2.3 From 119a5d573622ae90ba730d18acfae9bb75d77b9a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 30 Jun 2025 18:04:40 -0400 Subject: ring-buffer: Remove ring_buffer_read_prepare_sync() When the ring buffer was first introduced, reading the non-consuming "trace" file required disabling the writing of the ring buffer. To make sure the writing was fully disabled before iterating the buffer with a non-consuming read, it would set the disable flag of the buffer and then call an RCU synchronization to make sure all the buffers were synchronized. The function ring_buffer_read_start() originally would initialize the iterator and call an RCU synchronization, but this was for each individual per CPU buffer where this would get called many times on a machine with many CPUs before the trace file could be read. The commit 72c9ddfd4c5bf ("ring-buffer: Make non-consuming read less expensive with lots of cpus.") separated ring_buffer_read_start into ring_buffer_read_prepare(), ring_buffer_read_sync() and then ring_buffer_read_start() to allow each of the per CPU buffers to be prepared, call the read_buffer_read_sync() once, and then the ring_buffer_read_start() for each of the CPUs which made things much faster. The commit 1039221cc278 ("ring-buffer: Do not disable recording when there is an iterator") removed the requirement of disabling the recording of the ring buffer in order to iterate it, but it did not remove the synchronization that was happening that was required to wait for all the buffers to have no more writers. It's now OK for the buffers to have writers and no synchronization is needed. Remove the synchronization and put back the interface for the ring buffer iterator back before commit 72c9ddfd4c5bf was applied. Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250630180440.3eabb514@batman.local.home Reported-by: David Howells Fixes: 1039221cc278 ("ring-buffer: Do not disable recording when there is an iterator") Tested-by: David Howells Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index cd7f0ae26615..bc90c3c7b5fd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -152,9 +152,7 @@ ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags); -void ring_buffer_read_prepare_sync(void); -void ring_buffer_read_start(struct ring_buffer_iter *iter); +ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * -- cgit v1.2.3 From 07c3f391bcb217b6949b49785ccb5fee02be21fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Jul 2025 14:36:57 -0400 Subject: tracing: Remove EVENT_FILE_FL_SOFT_MODE flag When soft disabling of trace events was first created, it needed to have a way to know if a file had a user that was using it with soft disabled (for triggers that need to enable or disable events from a context that can not really enable or disable the event, it would set SOFT_DISABLED to state it is disabled). The flag SOFT_MODE was used to denote that an event had a user that would enable or disable it via the SOFT_DISABLED flag. Commit 1cf4c0732db3c ("tracing: Modify soft-mode only if there's no other referrer") fixed a bug where if two users were using the SOFT_DISABLED flag the accounting would get messed up as the SOFT_MODE flag could only handle one user. That commit added the sm_ref counter which kept track of how many users were using the event in "soft mode". This made the SOFT_MODE flag redundant as it should only be set if the sm_ref counter is non zero. Remove the SOFT_MODE flag and just use the sm_ref counter to know the event is in soft mode or not. This makes the code a bit simpler. Link: https://lore.kernel.org/all/20250702111908.03759998@batman.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Gabriele Paoloni Link: https://lore.kernel.org/20250702143657.18dd1882@batman.local.home Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fa9cf4292dff..04307a19cde3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -480,7 +480,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, - EVENT_FILE_FL_SOFT_MODE_BIT, EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, @@ -618,7 +617,6 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored - * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event @@ -633,7 +631,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), - EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), -- cgit v1.2.3 From 4d6d0a6263babf7c43faa55de4fa3c6637dec624 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 4 Jul 2025 10:48:38 -0400 Subject: tracing: Remove redundant config HAVE_FTRACE_MCOUNT_RECORD Ftrace is tightly coupled with architecture specific code because it requires the use of trampolines written in assembly. This means that when a new feature or optimization is made, it must be done for all architectures. To simplify the approach, CONFIG_HAVE_FTRACE_* configs are added to denote which architecture has the new enhancement so that other architectures can still function until they too have been updated. The CONFIG_HAVE_FTRACE_MCOUNT was added to help simplify the DYNAMIC_FTRACE work, but now every architecture that implements DYNAMIC_FTRACE also has HAVE_FTRACE_MCOUNT set too, making it redundant with the HAVE_DYNAMIC_FTRACE. Remove the HAVE_FTRACE_MCOUNT config and use DYNAMIC_FTRACE directly where applicable. Link: https://lore.kernel.org/all/20250703154916.48e3ada7@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Linus Torvalds Link: https://lore.kernel.org/20250704104838.27a18690@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/ftrace.h | 2 +- include/linux/kernel.h | 6 +++--- include/linux/module.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fa5f19b8d53a..ae2d2359b79e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -167,7 +167,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define FTRACE_STUB_HACK #endif -#ifdef CONFIG_FTRACE_MCOUNT_RECORD +#ifdef CONFIG_DYNAMIC_FTRACE /* * The ftrace call sites are logged to a section whose name depends on the * compiler option used. A given kernel image will only use one, AKA diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b672ca15f265..7ded7df6e9b5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1108,7 +1108,7 @@ static __always_inline unsigned long get_lock_parent_ip(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_FTRACE_MCOUNT_RECORD +#ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_init(void); #ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY #define FTRACE_CALLSITE_SECTION "__patchable_function_entries" diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1cce1f6410a9..989315dabb86 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -373,9 +373,9 @@ ftrace_vprintk(const char *fmt, va_list ap) static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ -/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ -#ifdef CONFIG_FTRACE_MCOUNT_RECORD -# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD +/* Rebuild everything on CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE +# define REBUILD_DUE_TO_DYNAMIC_FTRACE #endif /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ diff --git a/include/linux/module.h b/include/linux/module.h index 5faa1fb1f4b4..800e6fde9bf7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -539,7 +539,7 @@ struct module { struct trace_eval_map **trace_evals; unsigned int num_trace_evals; #endif -#ifdef CONFIG_FTRACE_MCOUNT_RECORD +#ifdef CONFIG_DYNAMIC_FTRACE unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif -- cgit v1.2.3 From 0a949252556809ce922e0289c148883e838cb9bb Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 11 Jul 2025 15:17:37 +0200 Subject: rv/ltl: Do not execute the Buchi automaton twice on start condition On start condition of a Buchi automaton, the automaton is executed twice. This is fine for now, as all the current LTL operators do not care about this. But it would break the 'next' operator, which will be introduced in a follow-up patch. Prepare for the introduction of the 'next' operator, only execute the automaton once on start condition. Cc: John Ogness Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Gabriele Monaco Link: https://lore.kernel.org/9379f4e7b9c1c69a6dca3e20a22936c850a25ca7.1752239482.git.namcao@linutronix.de Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/rv/ltl_monitor.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h index 9a583125b566..67031a774e3d 100644 --- a/include/rv/ltl_monitor.h +++ b/include/rv/ltl_monitor.h @@ -167,8 +167,10 @@ static void ltl_atom_update(struct task_struct *task, enum ltl_atom atom, bool v ltl_atom_set(mon, atom, value); ltl_atoms_fetch(task, mon); - if (!rv_ltl_valid_state(mon)) + if (!rv_ltl_valid_state(mon)) { ltl_attempt_start(task, mon); + return; + } ltl_validate(task, mon); } -- cgit v1.2.3 From 5ec9d26b78c4eb7c2fab54dcec6c0eb845302a98 Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Wed, 23 Jul 2025 00:19:00 +0100 Subject: cdrom: Call cdrom_mrw_exit from cdrom_release function Remove the cdrom_mrw_exit call from unregister_cdrom, as it invokes block commands that can fail due to a NULL pointer dereference from the call happening too late, during the unloading of the driver (e.g. unplugging of USB optical drives). Instead perform the call inside cdrom_release, thus also removing the need for the exit function pointer inside the cdrom_device_info struct. Reported-by: Sergey Senozhatsky Closes: https://lore.kernel.org/linux-block/uxgzea5ibqxygv3x7i4ojbpvcpv2wziorvb3ns5cdtyvobyn7h@y4g4l5ezv2ec Suggested-by: Jens Axboe Link: https://lore.kernel.org/linux-block/6686fe78-a050-4a1d-aa27-b7bf7ca6e912@kernel.dk Tested-by: Phillip Potter Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20250722231900.1164-2-phil@philpotter.co.uk Signed-off-by: Jens Axboe --- include/linux/cdrom.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index fdfb61ccf55a..b907e6c2307d 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -62,7 +62,6 @@ struct cdrom_device_info { __u8 last_sense; __u8 media_written; /* dirty flag, DVD+RW bookkeeping */ unsigned short mmc3_profile; /* current MMC3 profile */ - int (*exit)(struct cdrom_device_info *); int mrw_mode_page; bool opened_for_data; __s64 last_media_change_ms; -- cgit v1.2.3 From ad892e912b84b706ed399a212174978ddd1ac1f9 Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Mon, 21 Jul 2025 11:16:07 +0800 Subject: tcp: trace retransmit failures in tcp_retransmit_skb Background ========== When TCP retransmits a packet due to missing ACKs, the retransmission may fail for various reasons (e.g., packets stuck in driver queues, receiver zero windows, or routing issues). The original tcp_retransmit_skb tracepoint: 'commit e086101b150a ("tcp: add a tracepoint for tcp retransmission")' lacks visibility into these failure causes, making production diagnostics difficult. Solution ======== Adds the retval("err") to the tcp_retransmit_skb tracepoint. Enables users to know why some tcp retransmission failed and users can filter retransmission failures by retval. Compatibility description ========================= This patch extends the tcp_retransmit_skb tracepoint by adding a new "err" field at the end of its existing structure (within TP_STRUCT__entry). The compatibility implications are detailed as follows: 1) Structural compatibility for legacy user-space tools Legacy tools/BPF programs accessing existing fields (by offset or name) can still work without modification or recompilation.The new field is appended to the end, preserving original memory layout. 2) Note: semantic changes The original tracepoint primarily only focused on successfully retransmitted packets. With this patch, the tracepoint now can figure out packets that may terminate early due to specific reasons. For accurate statistics, users should filter using "err" to distinguish outcomes. Before patched: field:const void * skbaddr; offset:8; size:8; signed:0; field:const void * skaddr; offset:16; size:8; signed:0; field:int state; offset:24; size:4; signed:1; field:__u16 sport; offset:28; size:2; signed:0; field:__u16 dport; offset:30; size:2; signed:0; field:__u16 family; offset:32; size:2; signed:0; field:__u8 saddr[4]; offset:34; size:4; signed:0; field:__u8 daddr[4]; offset:38; size:4; signed:0; field:__u8 saddr_v6[16]; offset:42; size:16; signed:0; field:__u8 daddr_v6[16]; offset:58; size:16; signed:0; print fmt: "skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s" After patched: field:const void * skbaddr; offset:8; size:8; signed:0; field:const void * skaddr; offset:16; size:8; signed:0; field:int state; offset:24; size:4; signed:1; field:__u16 sport; offset:28; size:2; signed:0; field:__u16 dport; offset:30; size:2; signed:0; field:__u16 family; offset:32; size:2; signed:0; field:__u8 saddr[4]; offset:34; size:4; signed:0; field:__u8 daddr[4]; offset:38; size:4; signed:0; field:__u8 saddr_v6[16]; offset:42; size:16; signed:0; field:__u8 daddr_v6[16]; offset:58; size:16; signed:0; field:int err; offset:76; size:4; signed:1; print fmt: "skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s err=%d" Co-developed-by: xu xin Signed-off-by: xu xin Signed-off-by: Fan Yu Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250721111607626_BDnIJB0ywk6FghN63bor@zte.com.cn Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 54e60c6009e3..9d2c36c6a0ed 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -13,17 +13,11 @@ #include #include -/* - * tcp event with arguments sk and skb - * - * Note: this class requires a valid sk pointer; while skb pointer could - * be NULL. - */ -DECLARE_EVENT_CLASS(tcp_event_sk_skb, +TRACE_EVENT(tcp_retransmit_skb, - TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, int err), - TP_ARGS(sk, skb), + TP_ARGS(sk, skb, err), TP_STRUCT__entry( __field(const void *, skbaddr) @@ -36,6 +30,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) + __field(int, err) ), TP_fast_assign( @@ -58,21 +53,17 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->err = err; ), - TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s err=%d", __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, - show_tcp_state_name(__entry->state)) -); - -DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, - - TP_PROTO(const struct sock *sk, const struct sk_buff *skb), - - TP_ARGS(sk, skb) + show_tcp_state_name(__entry->state), + __entry->err) ); #undef FN -- cgit v1.2.3 From 0a61ec9cc51b0e43981222005444508437e95b33 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Jul 2025 15:17:25 +0300 Subject: PCI/TPH: Expose pcie_tph_get_st_table_size() Expose pcie_tph_get_st_table_size() to be used by drivers as will be done in the next patch from the series. Signed-off-by: Yishai Hadas Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/9ae851e0ee42cc56d2a30276e116b65091030ceb.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/pci-tph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index c3e806c13d64..9e4e331b1603 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -28,6 +28,7 @@ int pcie_tph_get_cpu_st(struct pci_dev *dev, unsigned int cpu_uid, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); +u16 pcie_tph_get_st_table_size(struct pci_dev *pdev); #else static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) -- cgit v1.2.3 From 5f9ec7880e6b3c4d0cf242fe28506d0b084328b1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Jul 2025 15:17:26 +0300 Subject: net/mlx5: Expose IFC bits for TPH Expose IFC bits for the TPH functionality. Signed-off-by: Yishai Hadas Reviewed-by: Edward Srouji Reviewed-by: Moshe Shemesh Link: https://patch.msgid.link/38ea3a0d56551364214e8edf359c9c77c9a3b71b.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ed4130e49c27..8360d9011d4f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1871,7 +1871,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0xb]; + u8 reserved_at_2a0[0x7]; + u8 mkey_pcie_tph[0x1]; + u8 reserved_at_2a8[0x3]; u8 shampo[0x1]; u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; @@ -4418,6 +4420,10 @@ enum { MLX5_MKC_ACCESS_MODE_CROSSING = 0x6, }; +enum { + MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX = 0, +}; + struct mlx5_ifc_mkc_bits { u8 reserved_at_0[0x1]; u8 free[0x1]; @@ -4469,7 +4475,11 @@ struct mlx5_ifc_mkc_bits { u8 relaxed_ordering_read[0x1]; u8 log_page_size[0x6]; - u8 reserved_at_1e0[0x20]; + u8 reserved_at_1e0[0x5]; + u8 pcie_tph_en[0x1]; + u8 pcie_tph_ph[0x2]; + u8 pcie_tph_steering_tag_index[0x8]; + u8 reserved_at_1f0[0x10]; }; struct mlx5_ifc_pkey_bits { -- cgit v1.2.3 From 888a7776f4fb04c19bec70c737c61c2f383c6b1e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Jul 2025 15:17:27 +0300 Subject: net/mlx5: Add support for device steering tag Background, from PCIe specification 6.2. TLP Processing Hints (TPH) -------------------------- TLP Processing Hints is an optional feature that provides hints in Request TLP headers to facilitate optimized processing of Requests that target Memory Space. These Processing Hints enable the system hardware (e.g., the Root Complex and/or Endpoints) to optimize platform resources such as system and memory interconnect on a per TLP basis. Steering Tags are system-specific values used to identify a processing resource that a Requester explicitly targets. System software discovers and identifies TPH capabilities to determine the Steering Tag allocation for each Function that supports TPH. This patch adds steering tag support for mlx5 based NICs by: - Enabling the TPH functionality over PCI if both FW and OS support it. - Managing steering tags and their matching steering indexes by writing a ST to an ST index over the PCI configuration space. - Exposing APIs to upper layers (e.g.,mlx5_ib) to allow usage of the PCI TPH infrastructure. Further details: - Upon probing of a device, the feature will be enabled based on both capability detection and OS support. - It will retrieve the appropriate ST for a given CPU ID and memory type using the pcie_tph_get_cpu_st() API. - It will track available ST indices according to the configuration space table size (expected to be 63 entries), reserving index 0 to indicate non-TPH use. - It will assign a free ST index with a ST using the pcie_tph_set_st_entry() API. - It will reuse the same index for identical (CPU ID + memory type) combinations by maintaining a reference count per entry. - It will expose APIs to upper layers (e.g., mlx5_ib) to allow usage of the PCI TPH infrastructure. - SF will use its parent PF stuff. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/de1ae7398e9e34eacd8c10845683df44fc9e32f8.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e6ba8f4f4bd1..104d4921c032 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -688,6 +689,7 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; +struct mlx5_st; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -757,6 +759,7 @@ struct mlx5_core_dev { u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_dm *dm; + struct mlx5_st *st; struct mlx5_vxlan *vxlan; struct mlx5_geneve *geneve; struct { @@ -1160,6 +1163,23 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id); +#ifdef CONFIG_PCIE_TPH +int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *st_index); +int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index); +#else +static inline int mlx5_st_alloc_index(struct mlx5_core_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *st_index) +{ + return -EOPNOTSUPP; +} +static inline int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index) +{ + return -EOPNOTSUPP; +} +#endif + struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev); void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev); -- cgit v1.2.3 From 5b2e45049dc06a876bc6b138218ddeb0814502ef Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Jul 2025 15:17:28 +0300 Subject: IB/core: Add UVERBS_METHOD_REG_MR on the MR object This new method enables us to use a single ioctl from user space which supports the below variants of reg_mr [1]. The method will be extended in the next patches from the series with an extra attribute to let us pass DMA handle to be used as part of the registration. [1] ibv_reg_mr(), ibv_reg_mr_iova(), ibv_reg_mr_iova2(), ibv_reg_dmabuf_mr(). Signed-off-by: Yishai Hadas Reviewed-by: Edward Srouji Link: https://patch.msgid.link/5a3822ceef084efe967c9752e89c58d8250337c7.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ib_user_ioctl_cmds.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 5f3e5bee51b2..ece923ab48a0 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -257,6 +257,7 @@ enum uverbs_methods_mr { UVERBS_METHOD_ADVISE_MR, UVERBS_METHOD_QUERY_MR, UVERBS_METHOD_REG_DMABUF_MR, + UVERBS_METHOD_REG_MR, }; enum uverbs_attrs_mr_destroy_ids { @@ -290,6 +291,19 @@ enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids { UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, }; +enum uverbs_attrs_reg_mr_cmd_attr_ids { + UVERBS_ATTR_REG_MR_HANDLE, + UVERBS_ATTR_REG_MR_PD_HANDLE, + UVERBS_ATTR_REG_MR_IOVA, + UVERBS_ATTR_REG_MR_ADDR, + UVERBS_ATTR_REG_MR_LENGTH, + UVERBS_ATTR_REG_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_MR_FD, + UVERBS_ATTR_REG_MR_FD_OFFSET, + UVERBS_ATTR_REG_MR_RESP_LKEY, + UVERBS_ATTR_REG_MR_RESP_RKEY, +}; + enum uverbs_attrs_create_counters_cmd_attr_ids { UVERBS_ATTR_CREATE_COUNTERS_HANDLE, }; -- cgit v1.2.3 From d83edab562a496a42720902a1d2effccd05c37c5 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Jul 2025 15:17:29 +0300 Subject: RDMA/core: Introduce a DMAH object and its alloc/free APIs Introduce a new DMA handle (DMAH) object along with its corresponding allocation and deallocation APIs. This DMAH object encapsulates attributes intended for use in DMA transactions. While its initial purpose is to support TPH functionality, it is designed to be extensible for future features such as DMA PCI multipath, PCI UIO configurations, PCI traffic class selection, and more. Further details: ---------------- We ensure that a caller requesting a DMA handle for a specific CPU ID is permitted to be scheduled on it. This prevent a potential security issue where a non privilege user may trigger DMA operations toward a CPU that it's not allowed to run on. We manage reference counting for the DMAH object and its consumers (e.g., memory regions) as will be detailed in subsequent patches in the series. Signed-off-by: Yishai Hadas Reviewed-by: Edward Srouji Link: https://patch.msgid.link/2cad097e849597e49d6b61e6865dba878257f371.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 26 ++++++++++++++++++++++++++ include/rdma/restrack.h | 4 ++++ include/uapi/rdma/ib_user_ioctl_cmds.h | 17 +++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3fb1c963eeb0..9ad253687935 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -42,6 +42,7 @@ #include #include #include +#include #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN @@ -1846,6 +1847,27 @@ struct ib_dm { atomic_t usecnt; }; +/* bit values to mark existence of ib_dmah fields */ +enum { + IB_DMAH_CPU_ID_EXISTS, + IB_DMAH_MEM_TYPE_EXISTS, + IB_DMAH_PH_EXISTS, +}; + +struct ib_dmah { + struct ib_device *device; + struct ib_uobject *uobject; + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; + u32 cpu_id; + enum tph_mem_type mem_type; + atomic_t usecnt; + u8 ph; + u8 valid_fields; /* use IB_DMAH_XXX_EXISTS */ +}; + struct ib_mr { struct ib_device *device; struct ib_pd *pd; @@ -2573,6 +2595,9 @@ struct ib_device_ops { struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); + int (*alloc_dmah)(struct ib_dmah *ibdmah, + struct uverbs_attr_bundle *attrs); + int (*dealloc_dmah)(struct ib_dmah *dmah, struct uverbs_attr_bundle *attrs); struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); @@ -2730,6 +2755,7 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); + DECLARE_RDMA_OBJ_SIZE(ib_dmah); DECLARE_RDMA_OBJ_SIZE(ib_mw); DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_qp); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 0d69ded73bf2..8a9bcf77dace 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -56,6 +56,10 @@ enum rdma_restrack_type { * @RDMA_RESTRACK_SRQ: Shared receive queue (SRQ) */ RDMA_RESTRACK_SRQ, + /** + * @RDMA_RESTRACK_DMAH: DMA handle + */ + RDMA_RESTRACK_DMAH, /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index ece923ab48a0..3bb72a259c29 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -55,6 +55,7 @@ enum uverbs_default_objects { UVERBS_OBJECT_DM, UVERBS_OBJECT_COUNTERS, UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_OBJECT_DMAH, }; enum { @@ -240,6 +241,22 @@ enum uverbs_methods_dm { UVERBS_METHOD_DM_FREE, }; +enum uverbs_attrs_alloc_dmah_cmd_attr_ids { + UVERBS_ATTR_ALLOC_DMAH_HANDLE, + UVERBS_ATTR_ALLOC_DMAH_CPU_ID, + UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE, + UVERBS_ATTR_ALLOC_DMAH_PH, +}; + +enum uverbs_attrs_free_dmah_cmd_attr_ids { + UVERBS_ATTR_FREE_DMA_HANDLE, +}; + +enum uverbs_methods_dmah { + UVERBS_METHOD_DMAH_ALLOC, + UVERBS_METHOD_DMAH_FREE, +}; + enum uverbs_attrs_reg_dm_mr_cmd_attr_ids { UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_ATTR_REG_DM_MR_OFFSET, -- cgit v1.2.3 From a272019a46c918575f10cc529c893585d46b3b55 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Jul 2025 15:17:31 +0300 Subject: IB: Extend UVERBS_METHOD_REG_MR to get DMAH Extend UVERBS_METHOD_REG_MR to get DMAH and pass it to all drivers. It will be used in mlx5 driver as part of the next patch from the series. Signed-off-by: Yishai Hadas Reviewed-by: Edward Srouji Link: https://patch.msgid.link/2ae1e628c0675db81f092cc00d3ad6fbf6139405.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 3 +++ include/uapi/rdma/ib_user_ioctl_cmds.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9ad253687935..6139223e92e4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1885,6 +1885,7 @@ struct ib_mr { struct ib_dm *dm; struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */ + struct ib_dmah *dmah; /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -2527,10 +2528,12 @@ struct ib_device_ops { struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, + struct ib_dmah *dmah, struct ib_udata *udata); struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, u64 length, u64 virt_addr, int fd, int mr_access_flags, + struct ib_dmah *dmah, struct uverbs_attr_bundle *attrs); struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 3bb72a259c29..de6f5a94f1e3 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -311,6 +311,7 @@ enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids { enum uverbs_attrs_reg_mr_cmd_attr_ids { UVERBS_ATTR_REG_MR_HANDLE, UVERBS_ATTR_REG_MR_PD_HANDLE, + UVERBS_ATTR_REG_MR_DMA_HANDLE, UVERBS_ATTR_REG_MR_IOVA, UVERBS_ATTR_REG_MR_ADDR, UVERBS_ATTR_REG_MR_LENGTH, -- cgit v1.2.3 From d0d05f602c1504fb868ed4a560d1465d88a3c5e5 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Tue, 29 Apr 2025 14:30:00 +0200 Subject: module: Move modprobe_path and modules_disabled ctl_tables into the module subsys Move module sysctl (modprobe_path and modules_disabled) out of sysctl.c and into the modules subsystem. Make modules_disabled static as it no longer needs to be exported. Remove module.h from the includes in sysctl as it no longer uses any module exported variables. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Reviewed-by: Luis Chamberlain Reviewed-by: Petr Pavlu Signed-off-by: Joel Granados --- include/linux/kmod.h | 3 --- include/linux/module.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 68f69362d427..9a07c3215389 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -14,10 +14,7 @@ #include #include -#define KMOD_PATH_LEN 256 - #ifdef CONFIG_MODULES -extern char modprobe_path[]; /* for sysctl */ /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ extern __printf(2, 3) diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..e93cdb92ad92 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -304,7 +304,6 @@ struct notifier_block; #ifdef CONFIG_MODULES -extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -- cgit v1.2.3 From f1b4f23a52c272f6c1e205e8ec243f563323c5aa Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Tue, 29 Apr 2025 15:12:17 +0200 Subject: locking/rtmutex: Move max_lock_depth into rtmutex.c Move the max_lock_depth sysctl table element into rtmutex_api.c. Removed the rtmutex.h include from sysctl.c. Chose to move into rtmutex_api.c to avoid multiple registrations every time rtmutex.c is included in other files. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Signed-off-by: Joel Granados --- include/linux/rtmutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 7d049883a08a..fa9f1021541e 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -18,7 +18,7 @@ #include #include -extern int max_lock_depth; /* for sysctl */ +extern int max_lock_depth; struct rt_mutex_base { raw_spinlock_t wait_lock; -- cgit v1.2.3 From fff6703fc843569d7a2f78ca08e7a69a9be22b0f Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Wed, 30 Apr 2025 14:07:33 +0200 Subject: rcu: Move rcu_stall related sysctls into rcu/tree_stall.h Move sysctl_panic_on_rcu_stall and sysctl_max_rcu_stall_to_panic into the kernel/rcu subdirectory. Make these static in tree_stall.h and removed them as extern from panic.h as their scope is now confined into one file. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Reviewed-by: Luis Chamberlain Reviewed-by: Joel Fernandes Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/panic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/panic.h b/include/linux/panic.h index 4adc65766935..8f2b5d92ac05 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -25,8 +25,6 @@ extern int panic_on_warn; extern unsigned long panic_on_taint; extern bool panic_on_taint_nousertaint; -extern int sysctl_panic_on_rcu_stall; -extern int sysctl_max_rcu_stall_to_panic; extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; -- cgit v1.2.3 From 9e2f403dd8c2b07aff012e72c1fe5455538d72d2 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Fri, 2 May 2025 15:32:17 +0200 Subject: parisc/power: Move soft-power into power.c Move the soft-power ctl table into parisc/power.c. As a consequence the pwrsw_enabled var is made static. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Reviewed-by: Luis Chamberlain Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 40a6ac6c9713..ae762eabb7c9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -242,7 +242,6 @@ int do_proc_douintvec(const struct ctl_table *table, int write, int write, void *data), void *data); -extern int pwrsw_enabled; extern int unaligned_enabled; extern int unaligned_dump_stack; extern int no_unaligned_warning; -- cgit v1.2.3 From 8e5f04b0d58c734c69a0b6e26317561919299638 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Fri, 2 May 2025 22:27:38 +0200 Subject: fork: mv threads-max into kernel/fork.c make sysctl_max_threads static as it no longer needs to be exported into sysctl.c. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Reviewed-by: Luis Chamberlain Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/sysctl.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ae762eabb7c9..30bcbc59d12d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -284,7 +284,4 @@ static inline bool sysctl_is_alias(char *param) } #endif /* CONFIG_SYSCTL */ -int sysctl_max_threads(const struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); - #endif /* _LINUX_SYSCTL_H */ -- cgit v1.2.3 From 39dac316f09ae5a0930878d2cae8aea113648b5a Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Fri, 27 Jun 2025 09:00:05 +0200 Subject: sysctl: Removed unused variable Remove unaligned_dump_stack from sysctl.h; it is no longer used or defined. Signed-off-by: Joel Granados --- include/linux/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 30bcbc59d12d..92e9146b1104 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -243,7 +243,6 @@ int do_proc_douintvec(const struct ctl_table *table, int write, void *data); extern int unaligned_enabled; -extern int unaligned_dump_stack; extern int no_unaligned_warning; #else /* CONFIG_SYSCTL */ -- cgit v1.2.3 From 4e8fc4f7208b032674ef8a4977b96484c328515c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 23 Jul 2025 20:23:29 +0800 Subject: netfs: Remove unused declaration netfs_queue_write_request() Commit c245868524cc ("netfs: Remove the old writeback code") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/20250723122329.923223-1-yuehaibing@huawei.com Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..486d5766fe77 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -443,7 +443,6 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); -void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); void netfs_end_io_read(struct inode *inode); -- cgit v1.2.3 From 70c672f933337fc1de2df8628567ee0a8146562b Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 20 Jun 2025 15:03:45 +0800 Subject: Bluetooth: Remove hci_conn_hash_lookup_state() Since commit 4aa42119d971 ("Bluetooth: Remove pending ACL connection attempts") this function is unused. Signed-off-by: Yue Haibing Reviewed-by: Simon Horman Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f79f59e67114..69f491399dac 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1420,26 +1420,6 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, - __u8 type, __u16 state) -{ - struct hci_conn_hash *h = &hdev->conn_hash; - struct hci_conn *c; - - rcu_read_lock(); - - list_for_each_entry_rcu(c, &h->list, list) { - if (c->type == type && c->state == state) { - rcu_read_unlock(); - return c; - } - } - - rcu_read_unlock(); - - return NULL; -} - typedef void (*hci_conn_func_t)(struct hci_conn *conn, void *data); static inline void hci_conn_hash_list_state(struct hci_dev *hdev, hci_conn_func_t func, __u8 type, -- cgit v1.2.3 From b2a5f2e1c127cb431df22e114998ff72eb4578c8 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 1 Jul 2025 15:56:22 +0800 Subject: Bluetooth: hci_event: Add support for handling LE BIG Sync Lost event When the BIS source stops, the controller sends an LE BIG Sync Lost event (subevent 0x1E). Currently, this event is not handled, causing the BIS stream to remain active in BlueZ and preventing recovery. Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 6 ++++++ include/net/bluetooth/hci_core.h | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c79901f2dc2a..6213012610d7 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2851,6 +2851,12 @@ struct hci_evt_le_big_sync_estabilished { __le16 bis[]; } __packed; +#define HCI_EVT_LE_BIG_SYNC_LOST 0x1e +struct hci_evt_le_big_sync_lost { + __u8 handle; + __u8 reason; +} __packed; + #define HCI_EVT_LE_BIG_INFO_ADV_REPORT 0x22 struct hci_evt_le_big_info_adv_report { __le16 sync_handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 69f491399dac..1ef9279cfd6f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1346,7 +1346,8 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev, } static inline struct hci_conn * -hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) +hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state, + __u8 role) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1354,7 +1355,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || c->state != state) + if (c->type != BIS_LINK || c->state != state || c->role != role) continue; if (handle == c->iso_qos.bcast.big) { -- cgit v1.2.3 From be31d11ec9144f7f8f7fcbf84ba6971b664683f3 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 1 Jul 2025 16:47:26 +0800 Subject: Bluetooth: Fix spelling mistakes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct the misspelling of “estabilished” in the code. Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6213012610d7..94f365b75166 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2837,7 +2837,7 @@ struct hci_evt_le_create_big_complete { } __packed; #define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d -struct hci_evt_le_big_sync_estabilished { +struct hci_evt_le_big_sync_established { __u8 status; __u8 handle; __u8 latency[3]; -- cgit v1.2.3 From 7565bc56598c3d135318f1bd76a0178dd3ea918f Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 14 Jul 2025 19:40:37 +0300 Subject: Bluetooth: ISO: add socket option to report packet seqnum via CMSG User applications need a way to track which ISO interval a given SDU belongs to, to properly detect packet loss. All controllers do not set timestamps, and it's not guaranteed user application receives all packet reports (small socket buffer, or controller doesn't send all reports like Intel AX210 is doing). Add socket option BT_PKT_SEQNUM that enables reporting of received packet ISO sequence number in BT_SCM_PKT_SEQNUM CMSG. Use BT_PKT_SEQNUM == 22 for the socket option, as 21 was used earlier for a removed experimental feature that never got into mainline. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 114299bd8b98..ada5b56a4413 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -244,6 +244,12 @@ struct bt_codecs { #define BT_ISO_BASE 20 +/* Socket option value 21 reserved */ + +#define BT_PKT_SEQNUM 22 + +#define BT_SCM_PKT_SEQNUM 0x05 + __printf(1, 2) void bt_info(const char *fmt, ...); __printf(1, 2) @@ -391,7 +397,8 @@ struct bt_sock { enum { BT_SK_DEFER_SETUP, BT_SK_SUSPEND, - BT_SK_PKT_STATUS + BT_SK_PKT_STATUS, + BT_SK_PKT_SEQNUM, }; struct bt_sock_list { @@ -475,6 +482,7 @@ struct bt_skb_cb { u8 pkt_type; u8 force_active; u16 expect; + u16 pkt_seqnum; u8 incoming:1; u8 pkt_status:2; union { @@ -488,6 +496,7 @@ struct bt_skb_cb { #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type #define hci_skb_pkt_status(skb) bt_cb((skb))->pkt_status +#define hci_skb_pkt_seqnum(skb) bt_cb((skb))->pkt_seqnum #define hci_skb_expect(skb) bt_cb((skb))->expect #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode #define hci_skb_event(skb) bt_cb((skb))->hci.req_event -- cgit v1.2.3 From 2935e556850e9c94d7a00adf14d3cd7fe406ac03 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Wed, 16 Jul 2025 22:23:58 +0300 Subject: Bluetooth: hci_sync: fix double free in 'hci_discovery_filter_clear()' Function 'hci_discovery_filter_clear()' frees 'uuids' array and then sets it to NULL. There is a tiny chance of the following race: 'hci_cmd_sync_work()' 'update_passive_scan_sync()' 'hci_update_passive_scan_sync()' 'hci_discovery_filter_clear()' kfree(uuids); <-------------------------preempted--------------------------------> 'start_service_discovery()' 'hci_discovery_filter_clear()' kfree(uuids); // DOUBLE FREE <-------------------------preempted--------------------------------> uuids = NULL; To fix it let's add locking around 'kfree()' call and NULL pointer assignment. Otherwise the following backtrace fires: [ ] ------------[ cut here ]------------ [ ] kernel BUG at mm/slub.c:547! [ ] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ ] CPU: 3 UID: 0 PID: 246 Comm: bluetoothd Tainted: G O 6.12.19-kernel #1 [ ] Tainted: [O]=OOT_MODULE [ ] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ ] pc : __slab_free+0xf8/0x348 [ ] lr : __slab_free+0x48/0x348 ... [ ] Call trace: [ ] __slab_free+0xf8/0x348 [ ] kfree+0x164/0x27c [ ] start_service_discovery+0x1d0/0x2c0 [ ] hci_sock_sendmsg+0x518/0x924 [ ] __sock_sendmsg+0x54/0x60 [ ] sock_write_iter+0x98/0xf8 [ ] do_iter_readv_writev+0xe4/0x1c8 [ ] vfs_writev+0x128/0x2b0 [ ] do_writev+0xfc/0x118 [ ] __arm64_sys_writev+0x20/0x2c [ ] invoke_syscall+0x68/0xf0 [ ] el0_svc_common.constprop.0+0x40/0xe0 [ ] do_el0_svc+0x1c/0x28 [ ] el0_svc+0x30/0xd0 [ ] el0t_64_sync_handler+0x100/0x12c [ ] el0t_64_sync+0x194/0x198 [ ] Code: 8b0002e6 eb17031f 54fffbe1 d503201f (d4210000) [ ] ---[ end trace 0000000000000000 ]--- Fixes: ad383c2c65a5 ("Bluetooth: hci_sync: Enable advertising when LL privacy is enabled") Signed-off-by: Arseniy Krasnov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1ef9279cfd6f..3728495f0819 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -94,6 +95,7 @@ struct discovery_state { u16 uuid_count; u8 (*uuids)[16]; unsigned long name_resolve_timeout; + spinlock_t lock; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ @@ -889,6 +891,7 @@ static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, static inline void discovery_init(struct hci_dev *hdev) { + spin_lock_init(&hdev->discovery.lock); hdev->discovery.state = DISCOVERY_STOPPED; INIT_LIST_HEAD(&hdev->discovery.all); INIT_LIST_HEAD(&hdev->discovery.unknown); @@ -903,8 +906,11 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.report_invalid_rssi = true; hdev->discovery.rssi = HCI_RSSI_INVALID; hdev->discovery.uuid_count = 0; + + spin_lock(&hdev->discovery.lock); kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; + spin_unlock(&hdev->discovery.lock); } bool hci_discovery_active(struct hci_dev *hdev); -- cgit v1.2.3 From 0cadf8534f2a727bc3a01e8c583b085d25963ee0 Mon Sep 17 00:00:00 2001 From: Chris Down Date: Mon, 21 Jul 2025 16:30:23 +0100 Subject: Bluetooth: hci_event: Mask data status from LE ext adv reports The Event_Type field in an LE Extended Advertising Report uses bits 5 and 6 for data status (e.g. truncation or fragmentation), not the PDU type itself. The ext_evt_type_to_legacy() function fails to mask these status bits before evaluation. This causes valid advertisements with status bits set (e.g. a truncated non-connectable advertisement, which ends up showing as PDU type 0x40) to be misclassified as unknown and subsequently dropped. This is okay for most checks which use bitwise AND on the relevant event type bits, but it doesn't work for non-connectable types, which are checked with '== LE_EXT_ADV_NON_CONN_IND' (that is, zero). In terms of behaviour, first the device sends a truncated report: > HCI Event: LE Meta Event (0x3e) plen 26 LE Extended Advertising Report (0x0d) Entry 0 Event type: 0x0040 Data status: Incomplete, data truncated, no more to come Address type: Random (0x01) Address: 1D:12:46:FA:F8:6E (Non-Resolvable) SID: 0x03 RSSI: -98 dBm (0x9e) Data length: 0x00 Then, a few seconds later, it sends the subsequent complete report: > HCI Event: LE Meta Event (0x3e) plen 122 LE Extended Advertising Report (0x0d) Entry 0 Event type: 0x0000 Data status: Complete Address type: Random (0x01) Address: 1D:12:46:FA:F8:6E (Non-Resolvable) SID: 0x03 RSSI: -97 dBm (0x9f) Data length: 0x60 Service Data: Google (0xfef3) Data[92]: ... These devices often send multiple truncated reports per second. This patch introduces a PDU type mask to ensure only the relevant bits are evaluated, allowing for the correct translation of all valid extended advertising packets. Fixes: b2cc9761f144 ("Bluetooth: Handle extended ADV PDU types") Signed-off-by: Chris Down Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 94f365b75166..e90a7b753926 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2634,6 +2634,7 @@ struct hci_ev_le_conn_complete { #define LE_EXT_ADV_DIRECT_IND 0x0004 #define LE_EXT_ADV_SCAN_RSP 0x0008 #define LE_EXT_ADV_LEGACY_PDU 0x0010 +#define LE_EXT_ADV_DATA_STATUS_MASK 0x0060 #define LE_EXT_ADV_EVT_TYPE_MASK 0x007f #define ADDR_LE_DEV_PUBLIC 0x00 -- cgit v1.2.3 From a7bcffc673de219af2698fbb90627016233de67b Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 10 Jul 2025 18:52:47 +0800 Subject: Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections Currently, BIS_LINK is used for both BIG sync and PA sync connections, which makes it impossible to distinguish them when searching for a PA sync connection. Adding PA_LINK will make the distinction clearer and simplify future extensions for PA-related features. Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e90a7b753926..df1847b74e55 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -562,6 +562,7 @@ enum { #define LE_LINK 0x80 #define CIS_LINK 0x82 #define BIS_LINK 0x83 +#define PA_LINK 0x84 #define INVALID_LINK 0xff /* LMP features */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3728495f0819..4dc11c66f7b8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1015,6 +1015,7 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) break; case CIS_LINK: case BIS_LINK: + case PA_LINK: h->iso_num++; break; } @@ -1042,6 +1043,7 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) break; case CIS_LINK: case BIS_LINK: + case PA_LINK: h->iso_num--; break; } @@ -1060,6 +1062,7 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) return h->sco_num; case CIS_LINK: case BIS_LINK: + case PA_LINK: return h->iso_num; default: return 0; @@ -1142,7 +1145,7 @@ hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK) + if (c->type != PA_LINK) continue; if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) @@ -1337,7 +1340,7 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK) + if (c->type != PA_LINK) continue; if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) { @@ -1407,7 +1410,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK) + if (c->type != PA_LINK) continue; /* Ignore the listen hcon, we are looking @@ -2006,6 +2009,7 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, case CIS_LINK: case BIS_LINK: + case PA_LINK: return iso_connect_ind(hdev, bdaddr, flags); default: -- cgit v1.2.3 From ab29b3460c5cec24bde75278d7ffca23cac1b867 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Wed, 23 Jul 2025 22:24:23 +0800 Subject: ALSA: hda: Add TAS2770 support Add TAS2770 support in TI's HDA driver. And add hda_chip_id for more products. Distinguish DSP and non-DSP in firmware loading function. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250723142423.38768-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2770-tlv.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/sound/tas2770-tlv.h (limited to 'include') diff --git a/include/sound/tas2770-tlv.h b/include/sound/tas2770-tlv.h new file mode 100644 index 000000000000..c0bd495b4a07 --- /dev/null +++ b/include/sound/tas2770-tlv.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// +// ALSA SoC Texas Instruments TAS2770 Audio Smart Amplifier +// +// Copyright (C) 2025 Texas Instruments Incorporated +// https://www.ti.com +// +// The TAS2770 hda driver implements for one, two, or even multiple +// TAS2770 chips. +// +// Author: Baojun Xu +// + +#ifndef __TAS2770_TLV_H__ +#define __TAS2770_TLV_H__ + +#define TAS2770_DVC_LEVEL TASDEVICE_REG(0x0, 0x0, 0x17) +#define TAS2770_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x03) + +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_dvc_tlv, 1650, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_amp_tlv, 1100, 50, 0); + +#endif -- cgit v1.2.3 From 9f0cb91767f582df6b17c1e2f22f684c36962295 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 Jul 2025 10:37:14 -0400 Subject: tracing: arm: arm64: Hide trace events ipi_raise, ipi_entry and ipi_exit The ipi tracepoints are mostly generic, but the tracepoints ipi_raise, ipi_entry and ipi_exit are only used by arm and arm64. This means these trace events are wasting memory in all the other architectures that do not use them. Add CONFIG_HAVE_EXTRA_IPI_TRACEPOINTS and have arm and arm64 select it to enable these trace events. The config makes it easy if other architectures decide to trace these as well. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Will Deacon Cc: Russell King Cc: Mark Rutland Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Nicolas Pitre Cc: Peter Zijlstra Link: https://lore.kernel.org/20250722103714.64eba013@gandalf.local.home Signed-off-by: Steven Rostedt (Google) Acked-by: Catalin Marinas --- include/trace/events/ipi.h | 58 ++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/trace/events/ipi.h b/include/trace/events/ipi.h index 3de9bfc982ce..9912f0ded81d 100644 --- a/include/trace/events/ipi.h +++ b/include/trace/events/ipi.h @@ -7,34 +7,6 @@ #include -/** - * ipi_raise - called when a smp cross call is made - * - * @mask: mask of recipient CPUs for the IPI - * @reason: string identifying the IPI purpose - * - * It is necessary for @reason to be a static string declared with - * __tracepoint_string. - */ -TRACE_EVENT(ipi_raise, - - TP_PROTO(const struct cpumask *mask, const char *reason), - - TP_ARGS(mask, reason), - - TP_STRUCT__entry( - __bitmask(target_cpus, nr_cpumask_bits) - __field(const char *, reason) - ), - - TP_fast_assign( - __assign_bitmask(target_cpus, cpumask_bits(mask), nr_cpumask_bits); - __entry->reason = reason; - ), - - TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) -); - TRACE_EVENT(ipi_send_cpu, TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), @@ -79,6 +51,35 @@ TRACE_EVENT(ipi_send_cpumask, __get_cpumask(cpumask), __entry->callsite, __entry->callback) ); +#ifdef CONFIG_HAVE_EXTRA_IPI_TRACEPOINTS +/** + * ipi_raise - called when a smp cross call is made + * + * @mask: mask of recipient CPUs for the IPI + * @reason: string identifying the IPI purpose + * + * It is necessary for @reason to be a static string declared with + * __tracepoint_string. + */ +TRACE_EVENT(ipi_raise, + + TP_PROTO(const struct cpumask *mask, const char *reason), + + TP_ARGS(mask, reason), + + TP_STRUCT__entry( + __bitmask(target_cpus, nr_cpumask_bits) + __field(const char *, reason) + ), + + TP_fast_assign( + __assign_bitmask(target_cpus, cpumask_bits(mask), nr_cpumask_bits); + __entry->reason = reason; + ), + + TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) +); + DECLARE_EVENT_CLASS(ipi_handler, TP_PROTO(const char *reason), @@ -127,6 +128,7 @@ DEFINE_EVENT(ipi_handler, ipi_exit, TP_ARGS(reason) ); +#endif /* CONFIG_HAVE_EXTRA_IPI_TRACEPOINTS */ #endif /* _TRACE_IPI_H */ -- cgit v1.2.3 From 1bbdb81a98363fd5cd0c2ac16ad5346bdf814dff Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 22 Jul 2025 12:13:29 +0300 Subject: devlink: Fix excessive stack usage in rate TC bandwidth parsing The devlink_nl_rate_tc_bw_parse function uses a large stack array for devlink attributes, which triggers a warning about excessive stack usage: net/devlink/rate.c: In function 'devlink_nl_rate_tc_bw_parse': net/devlink/rate.c:382:1: error: the frame size of 1648 bytes is larger than 1536 bytes [-Werror=frame-larger-than=] Introduce a separate attribute set specifically for rate TC bandwidth parsing that only contains the two attributes actually used: index and bandwidth. This reduces the stack array from DEVLINK_ATTR_MAX entries to just 2 entries, solving the stack usage issue. Update devlink selftest to use the new 'index' and 'bw' attribute names consistent with the YAML spec. Example usage with ynl with the new spec: ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-set --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1, "rate-tc-bws": [ {"index": 0, "bw": 50}, {"index": 1, "bw": 50}, {"index": 2, "bw": 0}, {"index": 3, "bw": 0}, {"index": 4, "bw": 0}, {"index": 5, "bw": 0}, {"index": 6, "bw": 0}, {"index": 7, "bw": 0} ] }' ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-get --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1 }' output for rate-get: {'bus-name': 'pci', 'dev-name': '0000:08:00.0', 'port-index': 1, 'rate-tc-bws': [{'bw': 50, 'index': 0}, {'bw': 50, 'index': 1}, {'bw': 0, 'index': 2}, {'bw': 0, 'index': 3}, {'bw': 0, 'index': 4}, {'bw': 0, 'index': 5}, {'bw': 0, 'index': 6}, {'bw': 0, 'index': 7}], 'rate-tx-max': 0, 'rate-tx-priority': 0, 'rate-tx-share': 0, 'rate-tx-weight': 0, 'rate-type': 'leaf'} Fixes: 566e8f108fc7 ("devlink: Extend devlink rate API with traffic classes bandwidth management") Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/netdev/20250708160652.1810573-1-arnd@kernel.org/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507171943.W7DJcs6Y-lkp@intel.com/ Suggested-by: Jakub Kicinski Signed-off-by: Carolina Jubran Tested-by: Carolina Jubran Signed-off-by: Tariq Toukan Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/1753175609-330621-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index e72bcc239afd..9fcb25a0f447 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -635,8 +635,6 @@ enum devlink_attr { DEVLINK_ATTR_REGION_DIRECT, /* flag */ DEVLINK_ATTR_RATE_TC_BWS, /* nested */ - DEVLINK_ATTR_RATE_TC_INDEX, /* u8 */ - DEVLINK_ATTR_RATE_TC_BW, /* u32 */ /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate @@ -647,6 +645,15 @@ enum devlink_attr { DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 }; +enum devlink_rate_tc_attr { + DEVLINK_RATE_TC_ATTR_UNSPEC, + DEVLINK_RATE_TC_ATTR_INDEX, /* u8 */ + DEVLINK_RATE_TC_ATTR_BW, /* u32 */ + + __DEVLINK_RATE_TC_ATTR_MAX, + DEVLINK_RATE_TC_ATTR_MAX = __DEVLINK_RATE_TC_ATTR_MAX - 1 +}; + /* Mapping between internal resource described by the field and system * structure */ -- cgit v1.2.3 From f3d85c9ee51036ac7ed129ec16eef5df2192763e Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 21 Jul 2025 11:18:24 +0900 Subject: netmem: introduce struct netmem_desc mirroring struct page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To simplify struct page, the page pool members of struct page should be moved to other, allowing these members to be removed from struct page. Introduce a network memory descriptor to store the members, struct netmem_desc, and make it union'ed with the existing fields in struct net_iov, allowing to organize the fields of struct net_iov. Signed-off-by: Byungchul Park Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Pavel Begunkov Reviewed-by: Mina Almasry Reviewed-by: Vlastimil Babka Acked-by: Harry Yoo Link: https://patch.msgid.link/20250721021835.63939-2-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 116 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index de1d95f04076..535cf17b9134 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -12,6 +12,50 @@ #include #include +/* These fields in struct page are used by the page_pool and net stack: + * + * struct { + * unsigned long pp_magic; + * struct page_pool *pp; + * unsigned long _pp_mapping_pad; + * unsigned long dma_addr; + * atomic_long_t pp_ref_count; + * }; + * + * We mirror the page_pool fields here so the page_pool can access these + * fields without worrying whether the underlying fields belong to a + * page or netmem_desc. + * + * CAUTION: Do not update the fields in netmem_desc without also + * updating the anonymous aliasing union in struct net_iov. + */ +struct netmem_desc { + unsigned long _flags; + unsigned long pp_magic; + struct page_pool *pp; + unsigned long _pp_mapping_pad; + unsigned long dma_addr; + atomic_long_t pp_ref_count; +}; + +#define NETMEM_DESC_ASSERT_OFFSET(pg, desc) \ + static_assert(offsetof(struct page, pg) == \ + offsetof(struct netmem_desc, desc)) +NETMEM_DESC_ASSERT_OFFSET(flags, _flags); +NETMEM_DESC_ASSERT_OFFSET(pp_magic, pp_magic); +NETMEM_DESC_ASSERT_OFFSET(pp, pp); +NETMEM_DESC_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); +NETMEM_DESC_ASSERT_OFFSET(dma_addr, dma_addr); +NETMEM_DESC_ASSERT_OFFSET(pp_ref_count, pp_ref_count); +#undef NETMEM_DESC_ASSERT_OFFSET + +/* + * Since struct netmem_desc uses the space in struct page, the size + * should be checked, until struct netmem_desc has its own instance from + * slab, to avoid conflicting with other members within struct page. + */ +static_assert(sizeof(struct netmem_desc) <= offsetof(struct page, _refcount)); + /* net_iov */ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); @@ -30,13 +74,48 @@ enum net_iov_type { NET_IOV_MAX = ULONG_MAX }; +/* A memory descriptor representing abstract networking I/O vectors, + * generally for non-pages memory that doesn't have its corresponding + * struct page and needs to be explicitly allocated through slab. + * + * net_iovs are allocated and used by networking code, and the size of + * the chunk is PAGE_SIZE. + * + * This memory can be any form of non-struct paged memory. Examples + * include imported dmabuf memory and imported io_uring memory. See + * net_iov_type for all the supported types. + * + * @pp_magic: pp field, similar to the one in struct page/struct + * netmem_desc. + * @pp: the pp this net_iov belongs to, if any. + * @dma_addr: the dma addrs of the net_iov. Needed for the network + * card to send/receive this net_iov. + * @pp_ref_count: the pp ref count of this net_iov, exactly the same + * usage as struct page/struct netmem_desc. + * @owner: the net_iov_area this net_iov belongs to, if any. + * @type: the type of the memory. Different types of net_iovs are + * supported. + */ struct net_iov { - enum net_iov_type type; - unsigned long pp_magic; - struct page_pool *pp; + union { + struct netmem_desc desc; + + /* XXX: The following part should be removed once all + * the references to them are converted so as to be + * accessed via netmem_desc e.g. niov->desc.pp instead + * of niov->pp. + */ + struct { + unsigned long _flags; + unsigned long pp_magic; + struct page_pool *pp; + unsigned long _pp_mapping_pad; + unsigned long dma_addr; + atomic_long_t pp_ref_count; + }; + }; struct net_iov_area *owner; - unsigned long dma_addr; - atomic_long_t pp_ref_count; + enum net_iov_type type; }; struct net_iov_area { @@ -48,27 +127,22 @@ struct net_iov_area { unsigned long base_virtual; }; -/* These fields in struct page are used by the page_pool and net stack: +/* net_iov is union'ed with struct netmem_desc mirroring struct page, so + * the page_pool can access these fields without worrying whether the + * underlying fields are accessed via netmem_desc or directly via + * net_iov, until all the references to them are converted so as to be + * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp. * - * struct { - * unsigned long pp_magic; - * struct page_pool *pp; - * unsigned long _pp_mapping_pad; - * unsigned long dma_addr; - * atomic_long_t pp_ref_count; - * }; - * - * We mirror the page_pool fields here so the page_pool can access these fields - * without worrying whether the underlying fields belong to a page or net_iov. - * - * The non-net stack fields of struct page are private to the mm stack and must - * never be mirrored to net_iov. + * The non-net stack fields of struct page are private to the mm stack + * and must never be mirrored to net_iov. */ -#define NET_IOV_ASSERT_OFFSET(pg, iov) \ - static_assert(offsetof(struct page, pg) == \ +#define NET_IOV_ASSERT_OFFSET(desc, iov) \ + static_assert(offsetof(struct netmem_desc, desc) == \ offsetof(struct net_iov, iov)) +NET_IOV_ASSERT_OFFSET(_flags, _flags); NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); NET_IOV_ASSERT_OFFSET(pp, pp); +NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET -- cgit v1.2.3 From 38a436d4e26487e16ac6c1de17c030b1bef84d83 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 21 Jul 2025 11:18:25 +0900 Subject: netmem: use netmem_desc instead of page to access ->pp in __netmem_get_pp() To eliminate the use of the page pool fields in struct page, the page pool code should use netmem descriptor and APIs instead. However, __netmem_get_pp() still accesses ->pp via struct page. So change it to use struct netmem_desc instead, since ->pp no longer will be available in struct page. While at it, add a helper, __netmem_to_nmdesc(), that can be used to unsafely get pointer to netmem_desc backing the netmem_ref, only when the netmem_ref is always backed by system memory. Signed-off-by: Byungchul Park Link: https://patch.msgid.link/20250721021835.63939-3-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index 535cf17b9134..097bc74d9555 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -247,6 +247,24 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) return page_to_pfn(netmem_to_page(netmem)); } +/** + * __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing + * @netmem + * @netmem: netmem reference to convert + * + * Unsafe version that can be used only when @netmem is always backed by + * system memory, performs faster and generates smaller object code (no + * check for the LSB, no WARN). When @netmem points to IOV, provokes + * undefined behaviour. + * + * Return: pointer to the &netmem_desc (garbage if @netmem is not backed + * by system memory). + */ +static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem) +{ + return (__force struct netmem_desc *)netmem; +} + /* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to * common fields. * @netmem: netmem reference to extract as net_iov. @@ -280,7 +298,7 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) */ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) { - return __netmem_to_page(netmem)->pp; + return __netmem_to_nmdesc(netmem)->pp; } static inline struct page_pool *netmem_get_pp(netmem_ref netmem) -- cgit v1.2.3 From 89ade7c7306508f46b811cd43960eaed88e0e1dd Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 21 Jul 2025 11:18:26 +0900 Subject: netmem, mlx4: access ->pp_ref_count through netmem_desc instead of page To eliminate the use of struct page in page pool, the page pool users should use netmem descriptor and APIs instead. Make mlx4 access ->pp_ref_count through netmem_desc instead of page. While at it, add a helper, pp_page_to_nmdesc() and __pp_page_to_nmdesc(), that can be used to get netmem_desc from page only if it's a pp page. For now that netmem_desc overlays on page, it can be achieved by just casting, and use macro and _Generic to cover const casting as well. Plus, change page_pool_page_is_pp() to check for 'const struct page *' instead of 'struct page *' since it doesn't modify data and additionally covers const type. Signed-off-by: Byungchul Park Link: https://patch.msgid.link/20250721021835.63939-4-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 4 ++-- include/net/netmem.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa538feaa8d9..ae50c1641bed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4178,12 +4178,12 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) #ifdef CONFIG_PAGE_POOL -static inline bool page_pool_page_is_pp(struct page *page) +static inline bool page_pool_page_is_pp(const struct page *page) { return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; } #else -static inline bool page_pool_page_is_pp(struct page *page) +static inline bool page_pool_page_is_pp(const struct page *page) { return false; } diff --git a/include/net/netmem.h b/include/net/netmem.h index 097bc74d9555..f7dacc9e75fd 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -285,6 +285,23 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); } +/* XXX: How to extract netmem_desc from page must be changed, once + * netmem_desc no longer overlays on page and will be allocated through + * slab. + */ +#define __pp_page_to_nmdesc(p) (_Generic((p), \ + const struct page * : (const struct netmem_desc *)(p), \ + struct page * : (struct netmem_desc *)(p))) + +/* CAUTION: Check if the page is a pp page before calling this helper or + * know it's a pp page. + */ +#define pp_page_to_nmdesc(p) \ +({ \ + DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ + __pp_page_to_nmdesc(p); \ +}) + /** * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem * @netmem: netmem reference to get the pointer from -- cgit v1.2.3 From 9dfd871a3e2ed433d5fee519b90b7e619b972043 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 21 Jul 2025 11:18:35 +0900 Subject: libeth: xdp: access ->pp through netmem_desc instead of page To eliminate the use of struct page in page pool, the page pool users should use netmem descriptor and APIs instead. Make xdp access ->pp through netmem_desc instead of page. Signed-off-by: Byungchul Park Link: https://patch.msgid.link/20250721021835.63939-13-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/net/libeth/xdp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 6ce6aec6884c..f4880b50e804 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1292,7 +1292,7 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); #endif xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset, - page->pp->p.offset, len, true); + pp_page_to_nmdesc(page)->pp->p.offset, len, true); } /** -- cgit v1.2.3 From 320d031ad6e4d67e8e1ab08ac71efda02bc85683 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Tue, 22 Jul 2025 11:59:10 +0200 Subject: sched: Struct definition and parsing of dualpi2 qdisc DualPI2 is the reference implementation of IETF RFC9332 DualQ Coupled AQM (https://datatracker.ietf.org/doc/html/rfc9332) providing two queues called low latency (L-queue) and classic (C-queue). By default, it enqueues non-ECN and ECT(0) packets into the C-queue and ECT(1) and CE packets into the low latency queue (L-queue), as per IETF RFC9332 spec. This patch defines the dualpi2 Qdisc structure and parsing, and the following two patches include dumping and enqueue/dequeue for the DualPI2. Signed-off-by: Chia-Yu Chang Link: https://patch.msgid.link/20250722095915.24485-2-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 53 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 3e41349f3fa2..75d685ea8368 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1211,4 +1211,57 @@ enum { #define TCA_ETS_MAX (__TCA_ETS_MAX - 1) +/* DUALPI2 */ +enum tc_dualpi2_drop_overload { + TC_DUALPI2_DROP_OVERLOAD_OVERFLOW = 0, + TC_DUALPI2_DROP_OVERLOAD_DROP = 1, + __TCA_DUALPI2_DROP_OVERLOAD_MAX, +}; +#define TCA_DUALPI2_DROP_OVERLOAD_MAX (__TCA_DUALPI2_DROP_OVERLOAD_MAX - 1) + +enum tc_dualpi2_drop_early { + TC_DUALPI2_DROP_EARLY_DROP_DEQUEUE = 0, + TC_DUALPI2_DROP_EARLY_DROP_ENQUEUE = 1, + __TCA_DUALPI2_DROP_EARLY_MAX, +}; +#define TCA_DUALPI2_DROP_EARLY_MAX (__TCA_DUALPI2_DROP_EARLY_MAX - 1) + +enum tc_dualpi2_ecn_mask { + TC_DUALPI2_ECN_MASK_L4S_ECT = 1, + TC_DUALPI2_ECN_MASK_CLA_ECT = 2, + TC_DUALPI2_ECN_MASK_ANY_ECT = 3, + __TCA_DUALPI2_ECN_MASK_MAX, +}; +#define TCA_DUALPI2_ECN_MASK_MAX (__TCA_DUALPI2_ECN_MASK_MAX - 1) + +enum tc_dualpi2_split_gso { + TC_DUALPI2_SPLIT_GSO_NO_SPLIT_GSO = 0, + TC_DUALPI2_SPLIT_GSO_SPLIT_GSO = 1, + __TCA_DUALPI2_SPLIT_GSO_MAX, +}; +#define TCA_DUALPI2_SPLIT_GSO_MAX (__TCA_DUALPI2_SPLIT_GSO_MAX - 1) + +enum { + TCA_DUALPI2_UNSPEC, + TCA_DUALPI2_LIMIT, /* Packets */ + TCA_DUALPI2_MEMORY_LIMIT, /* Bytes */ + TCA_DUALPI2_TARGET, /* us */ + TCA_DUALPI2_TUPDATE, /* us */ + TCA_DUALPI2_ALPHA, /* Hz scaled up by 256 */ + TCA_DUALPI2_BETA, /* Hz scaled up by 256 */ + TCA_DUALPI2_STEP_THRESH_PKTS, /* Step threshold in packets */ + TCA_DUALPI2_STEP_THRESH_US, /* Step threshold in microseconds */ + TCA_DUALPI2_MIN_QLEN_STEP, /* Minimum qlen to apply STEP_THRESH */ + TCA_DUALPI2_COUPLING, /* Coupling factor between queues */ + TCA_DUALPI2_DROP_OVERLOAD, /* Whether to drop on overload */ + TCA_DUALPI2_DROP_EARLY, /* Whether to drop on enqueue */ + TCA_DUALPI2_C_PROTECTION, /* Percentage */ + TCA_DUALPI2_ECN_MASK, /* L4S queue classification mask */ + TCA_DUALPI2_SPLIT_GSO, /* Split GSO packets at enqueue */ + TCA_DUALPI2_PAD, + __TCA_DUALPI2_MAX +}; + +#define TCA_DUALPI2_MAX (__TCA_DUALPI2_MAX - 1) + #endif -- cgit v1.2.3 From d4de8bffbef4a7e4ad14b9fd2ff8e2d0e06b3fa5 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Tue, 22 Jul 2025 11:59:11 +0200 Subject: sched: Dump configuration and statistics of dualpi2 qdisc The configuration and statistics dump of the DualPI2 Qdisc provides information related to both queues, such as packet numbers and queuing delays in the L-queue and C-queue, as well as general information such as probability value, WRR credits, memory usage, packet marking counters, max queue size, etc. The following patch includes enqueue/dequeue for DualPI2. Signed-off-by: Chia-Yu Chang Link: https://patch.msgid.link/20250722095915.24485-3-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 75d685ea8368..c2da76e78bad 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1264,4 +1264,19 @@ enum { #define TCA_DUALPI2_MAX (__TCA_DUALPI2_MAX - 1) +struct tc_dualpi2_xstats { + __u32 prob; /* current probability */ + __u32 delay_c; /* current delay in C queue */ + __u32 delay_l; /* current delay in L queue */ + __u32 packets_in_c; /* number of packets enqueued in C queue */ + __u32 packets_in_l; /* number of packets enqueued in L queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ + __u32 step_marks; /* ECN marks due to the step AQM */ + __s32 credit; /* current c_protection credit */ + __u32 memory_used; /* Memory used by both queues */ + __u32 max_memory_used; /* Maximum used memory */ + __u32 memory_limit; /* Memory limit of both queues */ +}; + #endif -- cgit v1.2.3 From 8f9516daedd67097a0c6e463fcb7a42b5ee9d477 Mon Sep 17 00:00:00 2001 From: Koen De Schepper Date: Tue, 22 Jul 2025 11:59:12 +0200 Subject: sched: Add enqueue/dequeue of dualpi2 qdisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DualPI2 provides L4S-type low latency & loss to traffic that uses a scalable congestion controller (e.g. TCP-Prague, DCTCP) without degrading the performance of 'classic' traffic (e.g. Reno, Cubic etc.). It is to be the reference implementation of IETF RFC9332 DualQ Coupled AQM (https://datatracker.ietf.org/doc/html/rfc9332). Note that creating two independent queues cannot meet the goal of DualPI2 mentioned in RFC9332: "...to preserve fairness between ECN-capable and non-ECN-capable traffic." Further, it could even lead to starvation of Classic traffic, which is also inconsistent with the requirements in RFC9332: "...although priority MUST be bounded in order not to starve Classic traffic." DualPI2 is designed to maintain approximate per-flow fairness on L-queue and C-queue by forming a single qdisc using the coupling factor and scheduler between two queues. The qdisc provides two queues called low latency and classic. It classifies packets based on the ECN field in the IP headers. By default it directs non-ECN and ECT(0) into the classic queue and ECT(1) and CE into the low latency queue, as per the IETF spec. Each queue runs its own AQM: * The classic AQM is called PI2, which is similar to the PIE AQM but more responsive and simpler. Classic traffic requires a decent target queue (default 15ms for Internet deployment) to fully utilize the link and to avoid high drop rates. * The low latency AQM is, by default, a very shallow ECN marking threshold (1ms) similar to that used for DCTCP. The DualQ isolates the low queuing delay of the Low Latency queue from the larger delay of the 'Classic' queue. However, from a bandwidth perspective, flows in either queue will share out the link capacity as if there was just a single queue. This bandwidth pooling effect is achieved by coupling together the drop and ECN-marking probabilities of the two AQMs. The PI2 AQM has two main parameters in addition to its target delay. The integral gain factor alpha is used to slowly correct any persistent standing queue error from the target delay, while the proportional gain factor beta is used to quickly compensate for queue changes (growth or shrinkage). Either alpha and beta are given as a parameter, or they can be calculated by tc from alternative typical and maximum RTT parameters. Internally, the output of a linear Proportional Integral (PI) controller is used for both queues. This output is squared to calculate the drop or ECN-marking probability of the classic queue. This counterbalances the square-root rate equation of Reno/Cubic, which is the trick that balances flow rates across the queues. For the ECN-marking probability of the low latency queue, the output of the base AQM is multiplied by a coupling factor. This determines the balance between the flow rates in each queue. The default setting makes the flow rates roughly equal, which should be generally applicable. If DUALPI2 AQM has detected overload (due to excessive non-responsive traffic in either queue), it will switch to signaling congestion solely using drop, irrespective of the ECN field. Alternatively, it can be configured to limit the drop probability and let the queue grow and eventually overflow (like tail-drop). GSO splitting in DUALPI2 is configurable from userspace while the default behavior is to split gso. When running DUALPI2 at unshaped 10gigE with 4 download streams test, splitting gso apart results in halving the latency with no loss in throughput: Summary of tcp_4down run 'no_split_gso': avg median # data pts Ping (ms) ICMP : 0.53 0.30 ms 350 TCP download avg : 2326.86 N/A Mbits/s 350 TCP download sum : 9307.42 N/A Mbits/s 350 TCP download::1 : 2672.99 2568.73 Mbits/s 350 TCP download::2 : 2586.96 2570.51 Mbits/s 350 TCP download::3 : 1786.26 1798.82 Mbits/s 350 TCP download::4 : 2261.21 2309.49 Mbits/s 350 Summart of tcp_4down run 'split_gso': avg median # data pts Ping (ms) ICMP   : 0.22 0.23 ms 350 TCP download avg : 2335.02 N/A Mbits/s 350 TCP download sum : 9340.09 N/A Mbits/s 350 TCP download::1 : 2335.30 2334.22 Mbits/s 350 TCP download::2 : 2334.72 2334.20 Mbits/s 350 TCP download::3 : 2335.28 2334.58 Mbits/s 350 TCP download::4 : 2334.79 2334.39 Mbits/s 350 A similar result is observed when running DUALPI2 at unshaped 1gigE with 1 download stream test: Summary of tcp_1down run 'no_split_gso': avg median # data pts Ping (ms) ICMP : 1.13 1.25 ms 350 TCP download : 941.41 941.46 Mbits/s 350 Summart of tcp_1down run 'split_gso': avg median # data pts Ping (ms) ICMP : 0.51 0.55 ms 350 TCP download : 941.41 941.45 Mbits/s 350 Additional details can be found in the draft: https://datatracker.ietf.org/doc/html/rfc9332 Signed-off-by: Koen De Schepper Co-developed-by: Olga Albisser Signed-off-by: Olga Albisser Co-developed-by: Olivier Tilmans Signed-off-by: Olivier Tilmans Co-developed-by: Henrik Steen Signed-off-by: Henrik Steen Co-developed-by: Chia-Yu Chang Signed-off-by: Chia-Yu Chang Signed-off-by: Bob Briscoe Signed-off-by: Ilpo Järvinen Acked-by: Dave Taht Link: https://patch.msgid.link/20250722095915.24485-4-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e19184dd1b0f..d8ff24a33459 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -126,6 +126,7 @@ FN(CANFD_RX_INVALID_FRAME) \ FN(CANXL_RX_INVALID_FRAME) \ FN(PFMEMALLOC) \ + FN(DUALPI2_STEP_DROP) \ FNe(MAX) /** @@ -604,6 +605,11 @@ enum skb_drop_reason { * reached a path or socket not eligible for use of memory reserves */ SKB_DROP_REASON_PFMEMALLOC, + /** + * @SKB_DROP_REASON_DUALPI2_STEP_DROP: dropped by the step drop + * threshold of DualPI2 qdisc. + */ + SKB_DROP_REASON_DUALPI2_STEP_DROP, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From 28517c8b6275a9cd25a4974d0e4d58eaba465a67 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Wed, 23 Jul 2025 19:34:56 +0200 Subject: pwm: mc33xs2410: add hwmon support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support for hwmon is provided by a separate driver residing in hwmon subsystem which is implemented as auxiliary device. Add handling of this device. Signed-off-by: Dimitri Fedrau Link: https://lore.kernel.org/r/20250723-mc33xs2410-hwmon-v5-1-f62aab71cd59@liebherr.com Signed-off-by: Uwe Kleine-König --- include/linux/mc33xs2410.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/mc33xs2410.h (limited to 'include') diff --git a/include/linux/mc33xs2410.h b/include/linux/mc33xs2410.h new file mode 100644 index 000000000000..31c0edf10dd7 --- /dev/null +++ b/include/linux/mc33xs2410.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Liebherr-Electronics and Drives GmbH + */ +#ifndef _MC33XS2410_H +#define _MC33XS2410_H + +#include + +MODULE_IMPORT_NS("PWM_MC33XS2410"); + +int mc33xs2410_read_reg_ctrl(struct spi_device *spi, u8 reg, u16 *val); +int mc33xs2410_read_reg_diag(struct spi_device *spi, u8 reg, u16 *val); +int mc33xs2410_modify_reg(struct spi_device *spi, u8 reg, u8 mask, u8 val); + +#endif /* _MC33XS2410_H */ -- cgit v1.2.3 From e038d985c9823a12cd64fa077d0c5aca2c644b67 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Tue, 10 Jun 2025 15:29:46 +0000 Subject: mfd: Add Apple Silicon System Management Controller The System Management Controller (SMC) on Apple Silicon machines is a piece of hardware that exposes various functionalities such as temperature sensors, voltage/power meters, shutdown/reboot handling, GPIOs and more. Communication happens via a shared mailbox using the RTKit protocol which is also used for other co-processors. The SMC protocol then allows reading and writing many different keys which implement the various features. The MFD core device handles this protocol and exposes it to the sub-devices. Some of the sub-devices are potentially also useful on pre-M1 Apple machines and support for SMCs on these machines can be added at a later time. Co-developed-by: Hector Martin Signed-off-by: Hector Martin Reviewed-by: Alyssa Rosenzweig Reviewed-by: Neal Gompa Signed-off-by: Sven Peter Link: https://lore.kernel.org/r/20250610-smc-6-15-v7-5-556cafd771d3@kernel.org Signed-off-by: Lee Jones --- include/linux/mfd/macsmc.h | 279 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 include/linux/mfd/macsmc.h (limited to 'include') diff --git a/include/linux/mfd/macsmc.h b/include/linux/mfd/macsmc.h new file mode 100644 index 000000000000..6b13f01a8592 --- /dev/null +++ b/include/linux/mfd/macsmc.h @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Apple SMC (System Management Controller) core definitions + * + * Copyright (C) The Asahi Linux Contributors + */ + +#ifndef _LINUX_MFD_MACSMC_H +#define _LINUX_MFD_MACSMC_H + +#include + +/** + * typedef smc_key - Alias for u32 to be used for SMC keys + * + * SMC keys are 32bit integers containing packed ASCII characters in natural + * integer order, i.e. 0xAABBCCDD, which represent the FourCC ABCD. + * The SMC driver is designed with this assumption and ensures the right + * endianness is used when these are stored to memory and sent to or received + * from the actual SMC firmware (which can be done in either shared memory or + * as 64bit mailbox message on Apple Silicon). + * Internally, SMC stores these keys in a table sorted lexicographically and + * allows resolving an index into this table to the corresponding SMC key. + * Thus, storing keys as u32 is very convenient as it allows to e.g. use + * normal comparison operators which directly map to the natural order used + * by SMC firmware. + * + * This simple type alias is introduced to allow easy recognition of SMC key + * variables and arguments. + */ +typedef u32 smc_key; + +/** + * SMC_KEY - Convert FourCC SMC keys in source code to smc_key + * + * This macro can be used to easily define FourCC SMC keys in source code + * and convert these to u32 / smc_key, e.g. SMC_KEY(NTAP) will expand to + * 0x4e544150. + * + * @s: FourCC SMC key to be converted + */ +#define SMC_KEY(s) (smc_key)(_SMC_KEY(#s)) +#define _SMC_KEY(s) (((s)[0] << 24) | ((s)[1] << 16) | ((s)[2] << 8) | (s)[3]) + +#define APPLE_SMC_READABLE BIT(7) +#define APPLE_SMC_WRITABLE BIT(6) +#define APPLE_SMC_FUNCTION BIT(4) + +/** + * struct apple_smc_key_info - Information for a SMC key as returned by SMC + * @type_code: FourCC code indicating the type for this key. + * Known types: + * ch8*: ASCII string + * flag: Boolean, 1 or 0 + * flt: 32-bit single-precision IEEE 754 float + * hex: Binary data + * ioft: 64bit Unsigned fixed-point intger (48.16) + * {si,ui}{8,16,32,64}: Signed/Unsigned 8-/16-/32-/64-bit integer + * @size: Size of the buffer associated with this key + * @flags: Bitfield encoding flags (APPLE_SMC_{READABLE,WRITABLE,FUNCTION}) + */ +struct apple_smc_key_info { + u32 type_code; + u8 size; + u8 flags; +}; + +/** + * enum apple_smc_boot_stage - SMC boot stage + * @APPLE_SMC_BOOTING: SMC is booting + * @APPLE_SMC_INITIALIZED: SMC is initialized and ready to use + * @APPLE_SMC_ERROR_NO_SHMEM: Shared memory could not be initialized during boot + * @APPLE_SMC_ERROR_CRASHED: SMC has crashed + */ +enum apple_smc_boot_stage { + APPLE_SMC_BOOTING, + APPLE_SMC_INITIALIZED, + APPLE_SMC_ERROR_NO_SHMEM, + APPLE_SMC_ERROR_CRASHED +}; + +/** + * struct apple_smc + * @dev: Underlying device struct for the physical backend device + * @key_count: Number of available SMC keys + * @first_key: First valid SMC key + * @last_key: Last valid SMC key + * @event_handlers: Notifier call chain for events received from SMC + * @rtk: Pointer to Apple RTKit instance + * @init_done: Completion for initialization + * @boot_stage: Current boot stage of SMC + * @sram: Pointer to SRAM resource + * @sram_base: SRAM base address + * @shmem: RTKit shared memory structure for SRAM + * @msg_id: Current message id for commands, will be incremented for each command + * @atomic_mode: Flag set when atomic mode is entered + * @atomic_pending: Flag indicating pending atomic command + * @cmd_done: Completion for command execution in non-atomic mode + * @cmd_ret: Return value from SMC for last command + * @mutex: Mutex for non-atomic mode + * @lock: Spinlock for atomic mode + */ +struct apple_smc { + struct device *dev; + + u32 key_count; + smc_key first_key; + smc_key last_key; + + struct blocking_notifier_head event_handlers; + + struct apple_rtkit *rtk; + + struct completion init_done; + enum apple_smc_boot_stage boot_stage; + + struct resource *sram; + void __iomem *sram_base; + struct apple_rtkit_shmem shmem; + + unsigned int msg_id; + + bool atomic_mode; + bool atomic_pending; + struct completion cmd_done; + u64 cmd_ret; + + struct mutex mutex; + spinlock_t lock; +}; + +/** + * apple_smc_read - Read size bytes from given SMC key into buf + * @smc: Pointer to apple_smc struct + * @key: smc_key to be read + * @buf: Buffer into which size bytes of data will be read from SMC + * @size: Number of bytes to be read into buf + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_read(struct apple_smc *smc, smc_key key, void *buf, size_t size); + +/** + * apple_smc_write - Write size bytes into given SMC key from buf + * @smc: Pointer to apple_smc struct + * @key: smc_key data will be written to + * @buf: Buffer from which size bytes of data will be written to SMC + * @size: Number of bytes to be written + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_write(struct apple_smc *smc, smc_key key, void *buf, size_t size); + +/** + * apple_smc_enter_atomic - Enter atomic mode to be able to use apple_smc_write_atomic + * @smc: Pointer to apple_smc struct + * + * This function switches the SMC backend to atomic mode which allows the + * use of apple_smc_write_atomic while disabling *all* other functions. + * This is only used for shutdown/reboot which requires writing to a SMC + * key from atomic context. + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_enter_atomic(struct apple_smc *smc); + +/** + * apple_smc_write_atomic - Write size bytes into given SMC key from buf without sleeping + * @smc: Pointer to apple_smc struct + * @key: smc_key data will be written to + * @buf: Buffer from which size bytes of data will be written to SMC + * @size: Number of bytes to be written + * + * Note that this function will fail if apple_smc_enter_atomic hasn't been + * called before. + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_write_atomic(struct apple_smc *smc, smc_key key, void *buf, size_t size); + +/** + * apple_smc_rw - Write and then read using the given SMC key + * @smc: Pointer to apple_smc struct + * @key: smc_key data will be written to + * @wbuf: Buffer from which size bytes of data will be written to SMC + * @wsize: Number of bytes to be written + * @rbuf: Buffer to which size bytes of data will be read from SMC + * @rsize: Number of bytes to be read + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_rw(struct apple_smc *smc, smc_key key, void *wbuf, size_t wsize, + void *rbuf, size_t rsize); + +/** + * apple_smc_get_key_by_index - Given an index return the corresponding SMC key + * @smc: Pointer to apple_smc struct + * @index: Index to be resolved + * @key: Buffer for SMC key to be returned + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_get_key_by_index(struct apple_smc *smc, int index, smc_key *key); + +/** + * apple_smc_get_key_info - Get key information from SMC + * @smc: Pointer to apple_smc struct + * @key: Key to acquire information for + * @info: Pointer to struct apple_smc_key_info which will be filled + * + * Return: Zero on success, negative errno on error + */ +int apple_smc_get_key_info(struct apple_smc *smc, smc_key key, struct apple_smc_key_info *info); + +/** + * apple_smc_key_exists - Check if the given SMC key exists + * @smc: Pointer to apple_smc struct + * @key: smc_key to be checked + * + * Return: True if the key exists, false otherwise + */ +static inline bool apple_smc_key_exists(struct apple_smc *smc, smc_key key) +{ + return apple_smc_get_key_info(smc, key, NULL) >= 0; +} + +#define APPLE_SMC_TYPE_OPS(type) \ + static inline int apple_smc_read_##type(struct apple_smc *smc, smc_key key, type *p) \ + { \ + int ret = apple_smc_read(smc, key, p, sizeof(*p)); \ + return (ret < 0) ? ret : ((ret != sizeof(*p)) ? -EINVAL : 0); \ + } \ + static inline int apple_smc_write_##type(struct apple_smc *smc, smc_key key, type p) \ + { \ + return apple_smc_write(smc, key, &p, sizeof(p)); \ + } \ + static inline int apple_smc_write_##type##_atomic(struct apple_smc *smc, smc_key key, type p) \ + { \ + return apple_smc_write_atomic(smc, key, &p, sizeof(p)); \ + } \ + static inline int apple_smc_rw_##type(struct apple_smc *smc, smc_key key, \ + type w, type *r) \ + { \ + int ret = apple_smc_rw(smc, key, &w, sizeof(w), r, sizeof(*r)); \ + return (ret < 0) ? ret : ((ret != sizeof(*r)) ? -EINVAL : 0); \ + } + +APPLE_SMC_TYPE_OPS(u64) +APPLE_SMC_TYPE_OPS(u32) +APPLE_SMC_TYPE_OPS(u16) +APPLE_SMC_TYPE_OPS(u8) +APPLE_SMC_TYPE_OPS(s64) +APPLE_SMC_TYPE_OPS(s32) +APPLE_SMC_TYPE_OPS(s16) +APPLE_SMC_TYPE_OPS(s8) + +static inline int apple_smc_read_flag(struct apple_smc *smc, smc_key key, bool *flag) +{ + u8 val; + int ret = apple_smc_read_u8(smc, key, &val); + + if (ret < 0) + return ret; + + *flag = val ? true : false; + return ret; +} + +static inline int apple_smc_write_flag(struct apple_smc *smc, smc_key key, bool state) +{ + return apple_smc_write_u8(smc, key, state ? 1 : 0); +} + +static inline int apple_smc_write_flag_atomic(struct apple_smc *smc, smc_key key, bool state) +{ + return apple_smc_write_u8_atomic(smc, key, state ? 1 : 0); +} + +#endif -- cgit v1.2.3 From 897e8601b9cff1d054cdd53047f568b0e1995726 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 22 Jul 2025 18:18:17 +0200 Subject: s390/ism: fix concurrency management in ism_cmd() The s390x ISM device data sheet clearly states that only one request-response sequence is allowable per ISM function at any point in time. Unfortunately as of today the s390/ism driver in Linux does not honor that requirement. This patch aims to rectify that. This problem was discovered based on Aliaksei's bug report which states that for certain workloads the ISM functions end up entering error state (with PEC 2 as seen from the logs) after a while and as a consequence connections handled by the respective function break, and for future connection requests the ISM device is not considered -- given it is in a dysfunctional state. During further debugging PEC 3A was observed as well. A kernel message like [ 1211.244319] zpci: 061a:00:00.0: Event 0x2 reports an error for PCI function 0x61a is a reliable indicator of the stated function entering error state with PEC 2. Let me also point out that a kernel message like [ 1211.244325] zpci: 061a:00:00.0: The ism driver bound to the device does not support error recovery is a reliable indicator that the ISM function won't be auto-recovered because the ISM driver currently lacks support for it. On a technical level, without this synchronization, commands (inputs to the FW) may be partially or fully overwritten (corrupted) by another CPU trying to issue commands on the same function. There is hard evidence that this can lead to DMB token values being used as DMB IOVAs, leading to PEC 2 PCI events indicating invalid DMA. But this is only one of the failure modes imaginable. In theory even completely losing one command and executing another one twice and then trying to interpret the outputs as if the command we intended to execute was actually executed and not the other one is also possible. Frankly, I don't feel confident about providing an exhaustive list of possible consequences. Fixes: 684b89bc39ce ("s390/ism: add device driver for internal shared memory") Reported-by: Aliaksei Makarau Tested-by: Mahanta Jambigi Tested-by: Aliaksei Makarau Signed-off-by: Halil Pasic Reviewed-by: Alexandra Winter Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250722161817.1298473-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/ism.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ism.h b/include/linux/ism.h index 5428edd90982..8358b4cd7ba6 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -28,6 +28,7 @@ struct ism_dmb { struct ism_dev { spinlock_t lock; /* protects the ism device */ + spinlock_t cmd_lock; /* serializes cmds */ struct list_head list; struct pci_dev *pdev; -- cgit v1.2.3 From fe473fba6435f631a4c7459c02057bf57457e128 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 7 Jun 2025 21:22:32 +0100 Subject: mfd: twl6030-irq: Remove unused twl6030_mmc_card_detect* twl6030_mmc_card_detect() and twl6030_mmc_card_detect_config() have been unused since 2013's commit b2ff4790612b ("ARM: OMAP2+: Remove legacy omap4_twl6030_hsmmc_init") Remove them. Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250607202232.265344-1-linux@treblig.org Signed-off-by: Lee Jones --- include/linux/mfd/twl.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index 85dc406173db..b31e07fa4d51 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -205,27 +205,6 @@ int twl_get_hfclk_rate(void); int twl6030_interrupt_unmask(u8 bit_mask, u8 offset); int twl6030_interrupt_mask(u8 bit_mask, u8 offset); -/* Card detect Configuration for MMC1 Controller on OMAP4 */ -#ifdef CONFIG_TWL4030_CORE -int twl6030_mmc_card_detect_config(void); -#else -static inline int twl6030_mmc_card_detect_config(void) -{ - pr_debug("twl6030_mmc_card_detect_config not supported\n"); - return 0; -} -#endif - -/* MMC1 Controller on OMAP4 uses Phoenix irq for Card detect */ -#ifdef CONFIG_TWL4030_CORE -int twl6030_mmc_card_detect(struct device *dev, int slot); -#else -static inline int twl6030_mmc_card_detect(struct device *dev, int slot) -{ - pr_debug("Call back twl6030_mmc_card_detect not supported\n"); - return -EIO; -} -#endif /*----------------------------------------------------------------------*/ /* -- cgit v1.2.3 From ea39dd2638ff6920c342313db98dbc152e815ecd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Jun 2025 20:30:51 +0200 Subject: mfd: tps65219: Remove an unused field from 'struct tps65219' Since commit 3df4c6367520 ("mfd: tps65219: Add support for soft shutdown via sys-off API"), the 'nb' field from 'struct tps65219' is unused. Remove it. Also remove the now useless #include for the same reason. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/8a264c3a92b8e62c1dadd374f2685030e042eb08.1750530460.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lee Jones --- include/linux/mfd/tps65219.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/tps65219.h b/include/linux/mfd/tps65219.h index 3e8d29189267..690002932377 100644 --- a/include/linux/mfd/tps65219.h +++ b/include/linux/mfd/tps65219.h @@ -10,7 +10,6 @@ #define MFD_TPS65219_H #include -#include #include #include @@ -440,7 +439,6 @@ enum tps65219_irqs { * @regmap: Regmap for accessing the device registers * @chip_id: Chip ID * @irq_data: Regmap irq data used for the irq chip - * @nb: notifier block for the restart handler */ struct tps65219 { struct device *dev; @@ -448,7 +446,6 @@ struct tps65219 { unsigned int chip_id; struct regmap_irq_chip_data *irq_data; - struct notifier_block nb; }; #endif /* MFD_TPS65219_H */ -- cgit v1.2.3 From 83f9afe4689d96279d9ade6c1cce36fa86e8ac63 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Jun 2025 20:30:52 +0200 Subject: mfd: tps65219: Remove another unused field from 'struct tps65219' The 'chip_id' field from 'struct tps65219' is unused. Remove it. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/f20443e6e13b0b101648a41010a19ee56589fa0b.1750530460.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lee Jones --- include/linux/mfd/tps65219.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/tps65219.h b/include/linux/mfd/tps65219.h index 690002932377..55234e771ba7 100644 --- a/include/linux/mfd/tps65219.h +++ b/include/linux/mfd/tps65219.h @@ -437,14 +437,12 @@ enum tps65219_irqs { * * @dev: MFD device * @regmap: Regmap for accessing the device registers - * @chip_id: Chip ID * @irq_data: Regmap irq data used for the irq chip */ struct tps65219 { struct device *dev; struct regmap *regmap; - unsigned int chip_id; struct regmap_irq_chip_data *irq_data; }; -- cgit v1.2.3 From e403cdf0704b7b1fedaf4ed5f05cc814dffbd6d5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 1 Jul 2025 15:56:25 +0100 Subject: mfd: pcf50633: Remove the header file core.h The patches to remove all of the pieces of the pcf50633 have gone in and we're left with the header. Remove it. The pcf50633 was used as part of the OpenMoko devices but the support for its main chip was recently removed in: commit 61b7f8920b17 ("ARM: s3c: remove all s3c24xx support") See https://lore.kernel.org/all/Z8z236h4B5A6Ki3D@gallifrey/ Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250701145625.204048-1-linux@treblig.org Signed-off-by: Lee Jones --- include/linux/mfd/pcf50633/core.h | 229 -------------------------------------- 1 file changed, 229 deletions(-) delete mode 100644 include/linux/mfd/pcf50633/core.h (limited to 'include') diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h deleted file mode 100644 index 42d2b0e4884e..000000000000 --- a/include/linux/mfd/pcf50633/core.h +++ /dev/null @@ -1,229 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * core.h -- Core driver for NXP PCF50633 - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_CORE_H -#define __LINUX_MFD_PCF50633_CORE_H - -#include -#include -#include -#include -#include -#include - -struct pcf50633; -struct regmap; - -#define PCF50633_NUM_REGULATORS 11 - -struct pcf50633_platform_data { - struct regulator_init_data reg_init_data[PCF50633_NUM_REGULATORS]; - - char **batteries; - int num_batteries; - - /* - * Should be set accordingly to the reference resistor used, see - * I_{ch(ref)} charger reference current in the pcf50633 User - * Manual. - */ - int charger_reference_current_ma; - - /* Callbacks */ - void (*probe_done)(struct pcf50633 *); - void (*mbc_event_callback)(struct pcf50633 *, int); - void (*regulator_registered)(struct pcf50633 *, int); - void (*force_shutdown)(struct pcf50633 *); - - u8 resumers[5]; -}; - -struct pcf50633_irq { - void (*handler) (int, void *); - void *data; -}; - -int pcf50633_register_irq(struct pcf50633 *pcf, int irq, - void (*handler) (int, void *), void *data); -int pcf50633_free_irq(struct pcf50633 *pcf, int irq); - -int pcf50633_irq_mask(struct pcf50633 *pcf, int irq); -int pcf50633_irq_unmask(struct pcf50633 *pcf, int irq); -int pcf50633_irq_mask_get(struct pcf50633 *pcf, int irq); - -int pcf50633_read_block(struct pcf50633 *, u8 reg, - int nr_regs, u8 *data); -int pcf50633_write_block(struct pcf50633 *pcf, u8 reg, - int nr_regs, u8 *data); -u8 pcf50633_reg_read(struct pcf50633 *, u8 reg); -int pcf50633_reg_write(struct pcf50633 *pcf, u8 reg, u8 val); - -int pcf50633_reg_set_bit_mask(struct pcf50633 *pcf, u8 reg, u8 mask, u8 val); -int pcf50633_reg_clear_bits(struct pcf50633 *pcf, u8 reg, u8 bits); - -/* Interrupt registers */ - -#define PCF50633_REG_INT1 0x02 -#define PCF50633_REG_INT2 0x03 -#define PCF50633_REG_INT3 0x04 -#define PCF50633_REG_INT4 0x05 -#define PCF50633_REG_INT5 0x06 - -#define PCF50633_REG_INT1M 0x07 -#define PCF50633_REG_INT2M 0x08 -#define PCF50633_REG_INT3M 0x09 -#define PCF50633_REG_INT4M 0x0a -#define PCF50633_REG_INT5M 0x0b - -enum { - /* Chip IRQs */ - PCF50633_IRQ_ADPINS, - PCF50633_IRQ_ADPREM, - PCF50633_IRQ_USBINS, - PCF50633_IRQ_USBREM, - PCF50633_IRQ_RESERVED1, - PCF50633_IRQ_RESERVED2, - PCF50633_IRQ_ALARM, - PCF50633_IRQ_SECOND, - PCF50633_IRQ_ONKEYR, - PCF50633_IRQ_ONKEYF, - PCF50633_IRQ_EXTON1R, - PCF50633_IRQ_EXTON1F, - PCF50633_IRQ_EXTON2R, - PCF50633_IRQ_EXTON2F, - PCF50633_IRQ_EXTON3R, - PCF50633_IRQ_EXTON3F, - PCF50633_IRQ_BATFULL, - PCF50633_IRQ_CHGHALT, - PCF50633_IRQ_THLIMON, - PCF50633_IRQ_THLIMOFF, - PCF50633_IRQ_USBLIMON, - PCF50633_IRQ_USBLIMOFF, - PCF50633_IRQ_ADCRDY, - PCF50633_IRQ_ONKEY1S, - PCF50633_IRQ_LOWSYS, - PCF50633_IRQ_LOWBAT, - PCF50633_IRQ_HIGHTMP, - PCF50633_IRQ_AUTOPWRFAIL, - PCF50633_IRQ_DWN1PWRFAIL, - PCF50633_IRQ_DWN2PWRFAIL, - PCF50633_IRQ_LEDPWRFAIL, - PCF50633_IRQ_LEDOVP, - PCF50633_IRQ_LDO1PWRFAIL, - PCF50633_IRQ_LDO2PWRFAIL, - PCF50633_IRQ_LDO3PWRFAIL, - PCF50633_IRQ_LDO4PWRFAIL, - PCF50633_IRQ_LDO5PWRFAIL, - PCF50633_IRQ_LDO6PWRFAIL, - PCF50633_IRQ_HCLDOPWRFAIL, - PCF50633_IRQ_HCLDOOVL, - - /* Always last */ - PCF50633_NUM_IRQ, -}; - -struct pcf50633 { - struct device *dev; - struct regmap *regmap; - - struct pcf50633_platform_data *pdata; - int irq; - struct pcf50633_irq irq_handler[PCF50633_NUM_IRQ]; - struct work_struct irq_work; - struct workqueue_struct *work_queue; - struct mutex lock; - - u8 mask_regs[5]; - - u8 suspend_irq_masks[5]; - u8 resume_reason[5]; - int is_suspended; - - int onkey1s_held; - - struct platform_device *rtc_pdev; - struct platform_device *mbc_pdev; - struct platform_device *adc_pdev; - struct platform_device *input_pdev; - struct platform_device *bl_pdev; - struct platform_device *regulator_pdev[PCF50633_NUM_REGULATORS]; -}; - -enum pcf50633_reg_int1 { - PCF50633_INT1_ADPINS = 0x01, /* Adapter inserted */ - PCF50633_INT1_ADPREM = 0x02, /* Adapter removed */ - PCF50633_INT1_USBINS = 0x04, /* USB inserted */ - PCF50633_INT1_USBREM = 0x08, /* USB removed */ - /* reserved */ - PCF50633_INT1_ALARM = 0x40, /* RTC alarm time is reached */ - PCF50633_INT1_SECOND = 0x80, /* RTC periodic second interrupt */ -}; - -enum pcf50633_reg_int2 { - PCF50633_INT2_ONKEYR = 0x01, /* ONKEY rising edge */ - PCF50633_INT2_ONKEYF = 0x02, /* ONKEY falling edge */ - PCF50633_INT2_EXTON1R = 0x04, /* EXTON1 rising edge */ - PCF50633_INT2_EXTON1F = 0x08, /* EXTON1 falling edge */ - PCF50633_INT2_EXTON2R = 0x10, /* EXTON2 rising edge */ - PCF50633_INT2_EXTON2F = 0x20, /* EXTON2 falling edge */ - PCF50633_INT2_EXTON3R = 0x40, /* EXTON3 rising edge */ - PCF50633_INT2_EXTON3F = 0x80, /* EXTON3 falling edge */ -}; - -enum pcf50633_reg_int3 { - PCF50633_INT3_BATFULL = 0x01, /* Battery full */ - PCF50633_INT3_CHGHALT = 0x02, /* Charger halt */ - PCF50633_INT3_THLIMON = 0x04, - PCF50633_INT3_THLIMOFF = 0x08, - PCF50633_INT3_USBLIMON = 0x10, - PCF50633_INT3_USBLIMOFF = 0x20, - PCF50633_INT3_ADCRDY = 0x40, /* ADC result ready */ - PCF50633_INT3_ONKEY1S = 0x80, /* ONKEY pressed 1 second */ -}; - -enum pcf50633_reg_int4 { - PCF50633_INT4_LOWSYS = 0x01, - PCF50633_INT4_LOWBAT = 0x02, - PCF50633_INT4_HIGHTMP = 0x04, - PCF50633_INT4_AUTOPWRFAIL = 0x08, - PCF50633_INT4_DWN1PWRFAIL = 0x10, - PCF50633_INT4_DWN2PWRFAIL = 0x20, - PCF50633_INT4_LEDPWRFAIL = 0x40, - PCF50633_INT4_LEDOVP = 0x80, -}; - -enum pcf50633_reg_int5 { - PCF50633_INT5_LDO1PWRFAIL = 0x01, - PCF50633_INT5_LDO2PWRFAIL = 0x02, - PCF50633_INT5_LDO3PWRFAIL = 0x04, - PCF50633_INT5_LDO4PWRFAIL = 0x08, - PCF50633_INT5_LDO5PWRFAIL = 0x10, - PCF50633_INT5_LDO6PWRFAIL = 0x20, - PCF50633_INT5_HCLDOPWRFAIL = 0x40, - PCF50633_INT5_HCLDOOVL = 0x80, -}; - -/* misc. registers */ -#define PCF50633_REG_OOCSHDWN 0x0c - -/* LED registers */ -#define PCF50633_REG_LEDOUT 0x28 -#define PCF50633_REG_LEDENA 0x29 -#define PCF50633_REG_LEDCTL 0x2a -#define PCF50633_REG_LEDDIM 0x2b - -static inline struct pcf50633 *dev_to_pcf50633(struct device *dev) -{ - return dev_get_drvdata(dev); -} - -int pcf50633_irq_init(struct pcf50633 *pcf, int irq); -void pcf50633_irq_free(struct pcf50633 *pcf); -extern const struct dev_pm_ops pcf50633_pm; - -#endif -- cgit v1.2.3 From c371040f31ab63de992c7d811ffc4c7d450b46a5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 18:43:54 +0300 Subject: mfd: davinci_voicecodec: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250626154354.324439-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/mfd/davinci_voicecodec.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/davinci_voicecodec.h b/include/linux/mfd/davinci_voicecodec.h index 556375b91316..9acd703dd5ca 100644 --- a/include/linux/mfd/davinci_voicecodec.h +++ b/include/linux/mfd/davinci_voicecodec.h @@ -10,11 +10,13 @@ #ifndef __LINUX_MFD_DAVINCI_VOICECODEC_H_ #define __LINUX_MFD_DAVINCI_VOICECODEC_H_ -#include -#include +#include #include -#include +#include +struct clk; +struct device; +struct platform_device; struct regmap; /* -- cgit v1.2.3 From dd1902b6e90508b4243af930db933ea3d26d2981 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 18:59:51 +0300 Subject: mfd: wm8350-core: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20250626155951.325683-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/mfd/wm8350/core.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index a3241e4d7548..5f70d3b5d1b1 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -8,11 +8,12 @@ #ifndef __LINUX_MFD_WM8350_CORE_H_ #define __LINUX_MFD_WM8350_CORE_H_ -#include -#include -#include #include +#include +#include +#include #include +#include #include #include @@ -21,6 +22,9 @@ #include #include +struct device; +struct platform_device; + /* * Register values. */ -- cgit v1.2.3 From dd394515d18aedd379e8dc886cb8286e1714f735 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 18:45:44 +0300 Subject: mfd: madera: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20250626154544.324724-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/mfd/madera/pdata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h index 32e3470708ed..7e84738cbb20 100644 --- a/include/linux/mfd/madera/pdata.h +++ b/include/linux/mfd/madera/pdata.h @@ -8,10 +8,11 @@ #ifndef MADERA_PDATA_H #define MADERA_PDATA_H -#include #include #include #include +#include + #include #define MADERA_MAX_MICBIAS 4 -- cgit v1.2.3 From b9ec71fbd572042770df16c9b65bbf91cbd556cf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 27 Jun 2025 19:43:58 +0300 Subject: mfd: syscon: atmel-smc: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250627164414.1043434-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/mfd/syscon/atmel-smc.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/syscon/atmel-smc.h b/include/linux/mfd/syscon/atmel-smc.h index e9e24f4c4578..9b9119c742a2 100644 --- a/include/linux/mfd/syscon/atmel-smc.h +++ b/include/linux/mfd/syscon/atmel-smc.h @@ -11,9 +11,11 @@ #ifndef _LINUX_MFD_SYSCON_ATMEL_SMC_H_ #define _LINUX_MFD_SYSCON_ATMEL_SMC_H_ -#include -#include -#include +#include +#include + +struct device_node; +struct regmap; #define ATMEL_SMC_SETUP(cs) (((cs) * 0x10)) #define ATMEL_HSMC_SETUP(layout, cs) \ -- cgit v1.2.3 From db8db85cff331eb5a520a18a606692ff85405c3d Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 27 Jun 2025 12:53:54 +0200 Subject: mfd: rk8xx-core: Allow to customize RK806 reset mode The RK806 PMIC has a bitfield for configuring the restart/reset behavior (which I assume Rockchip calls "function") whenever the PMIC is reset either programmatically (c.f. DEV_RST in the datasheet) or via PWRCTRL or RESETB pins. For RK806, the following values are possible for RST_FUN: 0b00 means "Restart PMU" 0b01 means "Reset all the power off reset registers, forcing the state to switch to ACTIVE mode" 0b10 means "Reset all the power off reset registers, forcing the state to switch to ACTIVE mode, and simultaneously pull down the RESETB PIN for 5mS before releasing" 0b11 means the same as for 0b10 just above. This adds the appropriate logic in the driver to parse the new rockchip,reset-mode DT property to pass this information. It just happens that the values in the binding match the values to write in the bitfield so no mapping is necessary. If it is missing, the register is left untouched and relies either on the silicon default or on whatever was set earlier in the boot stages (e.g. the bootloader). Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20250627-rk8xx-rst-fun-v4-2-ce05d041b45f@cherry.de Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 69cbea78b430..28170ee08898 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -812,6 +812,8 @@ enum rk806_pin_dr_sel { #define RK806_INT_POL_H BIT(1) #define RK806_INT_POL_L 0 +/* SYS_CFG3 */ +#define RK806_RST_FUN_MSK GENMASK(7, 6) #define RK806_SLAVE_RESTART_FUN_MSK BIT(1) #define RK806_SLAVE_RESTART_FUN_EN BIT(1) #define RK806_SLAVE_RESTART_FUN_OFF 0 -- cgit v1.2.3 From 50a479527ef01f9b36dde1803a7e81741a222509 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 18 Jul 2025 14:54:32 +0100 Subject: ASoC: SDCA: Add support for -cn- value properties Many of the DisCo properties that specify Control values have an additional variant that specifies a separate value for each Control Number. Add support for these. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250718135432.1048566-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 90d77fc46416..06ec126cdcc3 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -742,14 +742,14 @@ struct sdca_control_range { * struct sdca_control - information for one SDCA Control * @label: Name for the Control, from SDCA Specification v1.0, section 7.1.7. * @sel: Identifier used for addressing. - * @value: Holds the Control value for constants and defaults. * @nbits: Number of bits used in the Control. - * @interrupt_position: SCDA interrupt line that will alert to changes on this - * Control. + * @values: Holds the Control value for constants and defaults. * @cn_list: A bitmask showing the valid Control Numbers within this Control, * Control Numbers typically represent channels. - * @range: Buffer describing valid range of values for the Control. + * @interrupt_position: SCDA interrupt line that will alert to changes on this + * Control. * @type: Format of the data in the Control. + * @range: Buffer describing valid range of values for the Control. * @mode: Access mode of the Control. * @layers: Bitmask of access layers of the Control. * @deferrable: Indicates if the access to the Control can be deferred. @@ -760,13 +760,13 @@ struct sdca_control { const char *label; int sel; - int value; int nbits; - int interrupt_position; + int *values; u64 cn_list; + int interrupt_position; - struct sdca_control_range range; enum sdca_control_datatype type; + struct sdca_control_range range; enum sdca_access_mode mode; u8 layers; -- cgit v1.2.3 From 678bae2eaa812662929a83b3de399645e9de93ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 22 Jul 2025 17:35:43 +0200 Subject: gpiolib: make legacy interfaces optional The traditional interfaces are only used on a small number of ancient boards. Make these optional now so they can be disabled by default. Signed-off-by: Arnd Bergmann Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20250722153634.3683927-1-arnd@kernel.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index ff99ed76fdc3..8f85ddb26429 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -13,6 +13,11 @@ #define __LINUX_GPIO_H #include +#ifdef CONFIG_GPIOLIB +#include +#endif + +#ifdef CONFIG_GPIOLIB_LEGACY struct device; @@ -22,9 +27,6 @@ struct device; #define GPIOF_OUT_INIT_HIGH ((0 << 0) | (1 << 1)) #ifdef CONFIG_GPIOLIB - -#include - /* * "valid" GPIO numbers are nonnegative and may be passed to * setup routines like gpio_request(). Only some valid numbers @@ -170,5 +172,5 @@ static inline int devm_gpio_request_one(struct device *dev, unsigned gpio, } #endif /* ! CONFIG_GPIOLIB */ - +#endif /* CONFIG_GPIOLIB_LEGACY */ #endif /* __LINUX_GPIO_H */ -- cgit v1.2.3 From fdb7f139864aa332ea8f161beb636dc0599c64f2 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 16 Jun 2025 13:29:56 -0700 Subject: ice, libie: move generic adminq descriptors to lib The descriptor structure is the same in ice, ixgbe and i40e. Move it to common libie header to use it across different driver. Leave device specific adminq commands in separate folders. This lead to a change that need to be done in filling/getting descriptor: - previous: struct specific_desc *cmd; cmd = &desc.params.specific_desc; - now: struct specific_desc *cmd; cmd = libie_aq_raw(&desc); Do this changes across the driver to allow clean build. The casting only have to be done in case of specific descriptors, for generic one union can still be used. Changes beside code moving: - change ICE_ prefix to LIBIE_ prefix (ice_ and libie_ too) - remove shift variables not otherwise needed (in libie_aq_flags) - fill/get descriptor data based on desc.params.raw whenever the descriptor isn't defined in libie - move defines from the libie_aq_sth structure outside - add libie_aq_raw helper and use it instead of explicit casting Reviewed by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Michal Swiatkowski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 273 +++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 include/linux/net/intel/libie/adminq.h (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h new file mode 100644 index 000000000000..3676adc33d3e --- /dev/null +++ b/include/linux/net/intel/libie/adminq.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef __LIBIE_ADMINQ_H +#define __LIBIE_ADMINQ_H + +#include +#include + +#define LIBIE_CHECK_STRUCT_LEN(n, X) \ + static_assert((n) == sizeof(struct X)) + +/** + * struct libie_aqc_generic - Generic structure used in adminq communication + * @param0: generic parameter high 32bit + * @param1: generic parameter lower 32bit + * @addr_high: generic address high 32bit + * @addr_low: generic address lower 32bit + */ +struct libie_aqc_generic { + __le32 param0; + __le32 param1; + __le32 addr_high; + __le32 addr_low; +}; +LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_generic); + +/** + * struct libie_aqc_get_ver - Used in command get version (direct 0x0001) + * @rom_ver: rom version + * @fw_build: number coressponding to firmware build + * @fw_branch: branch identifier of firmware version + * @fw_major: major number of firmware version + * @fw_minor: minor number of firmware version + * @fw_patch: patch of firmware version + * @api_branch: brancch identifier of API version + * @api_major: major number of API version + * @api_minor: minor number of API version + * @api_patch: patch of API version + */ +struct libie_aqc_get_ver { + __le32 rom_ver; + __le32 fw_build; + u8 fw_branch; + u8 fw_major; + u8 fw_minor; + u8 fw_patch; + u8 api_branch; + u8 api_major; + u8 api_minor; + u8 api_patch; +}; +LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_get_ver); + +/** + * struct libie_aqc_driver_ver - Used in command send driver version + * (indirect 0x0002) + * @major_ver: driver major version + * @minor_ver: driver minor version + * @build_ver: driver build version + * @subbuild_ver: driver subbuild version + * @reserved: for feature use + * @addr_high: high part of response address buff + * @addr_low: low part of response address buff + */ +struct libie_aqc_driver_ver { + u8 major_ver; + u8 minor_ver; + u8 build_ver; + u8 subbuild_ver; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; +LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_driver_ver); + +enum libie_aq_res_id { + LIBIE_AQC_RES_ID_NVM = 1, + LIBIE_AQC_RES_ID_SDP = 2, + LIBIE_AQC_RES_ID_CHNG_LOCK = 3, + LIBIE_AQC_RES_ID_GLBL_LOCK = 4, +}; + +enum libie_aq_res_access_type { + LIBIE_AQC_RES_ACCESS_READ = 1, + LIBIE_AQC_RES_ACCESS_WRITE = 2, +}; + +#define LIBIE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS 3000 +#define LIBIE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000 +#define LIBIE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000 +#define LIBIE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000 + +#define LIBIE_AQ_RES_GLBL_SUCCESS 0 +#define LIBIE_AQ_RES_GLBL_IN_PROG 1 +#define LIBIE_AQ_RES_GLBL_DONE 2 + +/** + * struct libie_aqc_req_res - Request resource ownership + * @res_id: resource ID (look at enum definition above) + * @access_type: read or write (enum definition above) + * @timeout: Upon successful completion, FW writes this value and driver is + * expected to release resource before timeout. This value is provided in + * milliseconds. + * @res_number: for SDP, this is the pin ID of the SDP + * @status: status only used for LIBIE_AQC_RES_ID_GLBL_LOCK, for others reserved + * @reserved: reserved for future use + * + * Used in commands: + * request resource ownership (direct 0x0008) + * request resource ownership (direct 0x0009) + */ +struct libie_aqc_req_res { + __le16 res_id; + __le16 access_type; + + __le32 timeout; + __le32 res_number; + __le16 status; + u8 reserved[2]; +}; +LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_req_res); + +/** + * struct libie_aqc_list_caps - Getting capabilities + * @cmd_flags: command flags + * @pf_index: index of PF to get caps from + * @reserved: reserved for future use + * @count: number of capabilities records + * @addr_high: high part of response address buff + * @addr_low: low part of response address buff + * + * Used in commands: + * get function capabilities (indirect 0x000A) + * get device capabilities (indirect 0x000B) + */ +struct libie_aqc_list_caps { + u8 cmd_flags; + u8 pf_index; + u8 reserved[2]; + __le32 count; + __le32 addr_high; + __le32 addr_low; +}; +LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); + +/* Device/Function buffer entry, repeated per reported capability */ +#define LIBIE_AQC_CAPS_VALID_FUNCTIONS 0x0005 +#define LIBIE_AQC_CAPS_SRIOV 0x0012 +#define LIBIE_AQC_CAPS_VF 0x0013 +#define LIBIE_AQC_CAPS_VSI 0x0017 +#define LIBIE_AQC_CAPS_DCB 0x0018 +#define LIBIE_AQC_CAPS_RSS 0x0040 +#define LIBIE_AQC_CAPS_RXQS 0x0041 +#define LIBIE_AQC_CAPS_TXQS 0x0042 +#define LIBIE_AQC_CAPS_MSIX 0x0043 +#define LIBIE_AQC_CAPS_FD 0x0045 +#define LIBIE_AQC_CAPS_1588 0x0046 +#define LIBIE_AQC_CAPS_MAX_MTU 0x0047 +#define LIBIE_AQC_CAPS_NVM_VER 0x0048 +#define LIBIE_AQC_CAPS_PENDING_NVM_VER 0x0049 +#define LIBIE_AQC_CAPS_OROM_VER 0x004A +#define LIBIE_AQC_CAPS_PENDING_OROM_VER 0x004B +#define LIBIE_AQC_CAPS_NET_VER 0x004C +#define LIBIE_AQC_CAPS_PENDING_NET_VER 0x004D +#define LIBIE_AQC_CAPS_RDMA 0x0051 +#define LIBIE_AQC_CAPS_SENSOR_READING 0x0067 +#define LIBIE_AQC_CAPS_PCIE_RESET_AVOIDANCE 0x0076 +#define LIBIE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077 +#define LIBIE_AQC_CAPS_NVM_MGMT 0x0080 +#define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085 +#define LIBIE_AQC_CAPS_NAC_TOPOLOGY 0x0087 +#define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 +#define LIBIE_AQC_BIT_ROCEV2_LAG 0x01 +#define LIBIE_AQC_BIT_SRIOV_LAG 0x02 + +/** + * struct libie_aqc_list_caps_elem - Getting list of caps elements + * @cap: one from the defines list above + * @major_ver: major version + * @minor_ver: minor version + * @number: number of resources described by this capability + * @logical_id: logical ID, only meaningful for some types of resources + * @phys_id: physical ID, only meaningful for some types of resources + * @rsvd1: reserved for future use + * @rsvd2: reserved for future use + */ +struct libie_aqc_list_caps_elem { + __le16 cap; + + u8 major_ver; + u8 minor_ver; + __le32 number; + __le32 logical_id; + __le32 phys_id; + __le64 rsvd1; + __le64 rsvd2; +}; +LIBIE_CHECK_STRUCT_LEN(32, libie_aqc_list_caps_elem); + +/** + * struct libie_aq_desc - Admin Queue (AQ) descriptor + * @flags: LIBIE_AQ_FLAG_* flags + * @opcode: AQ command opcode + * @datalen: length in bytes of indirect/external data buffer + * @retval: return value from firmware + * @cookie_high: opaque data high-half + * @cookie_low: opaque data low-half + * @params: command-specific parameters + * + * Descriptor format for commands the driver posts on the Admin Transmit Queue + * (ATQ). The firmware writes back onto the command descriptor and returns + * the result of the command. Asynchronous events that are not an immediate + * result of the command are written to the Admin Receive Queue (ARQ) using + * the same descriptor format. Descriptors are in little-endian notation with + * 32-bit words. + */ +struct libie_aq_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 retval; + __le32 cookie_high; + __le32 cookie_low; + union { + u8 raw[16]; + struct libie_aqc_generic generic; + struct libie_aqc_get_ver get_ver; + struct libie_aqc_driver_ver driver_ver; + struct libie_aqc_req_res res_owner; + struct libie_aqc_list_caps get_cap; + } params; +}; +LIBIE_CHECK_STRUCT_LEN(32, libie_aq_desc); + +/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ +#define LIBIE_AQ_LG_BUF 512 + +#define LIBIE_AQ_FLAG_DD BIT(0) /* 0x1 */ +#define LIBIE_AQ_FLAG_CMP BIT(1) /* 0x2 */ +#define LIBIE_AQ_FLAG_ERR BIT(2) /* 0x4 */ +#define LIBIE_AQ_FLAG_LB BIT(9) /* 0x200 */ +#define LIBIE_AQ_FLAG_RD BIT(10) /* 0x400 */ +#define LIBIE_AQ_FLAG_BUF BIT(12) /* 0x1000 */ +#define LIBIE_AQ_FLAG_SI BIT(13) /* 0x2000 */ + +/* error codes */ +enum libie_aq_err { + LIBIE_AQ_RC_OK = 0, /* Success */ + LIBIE_AQ_RC_EPERM = 1, /* Operation not permitted */ + LIBIE_AQ_RC_ENOENT = 2, /* No such element */ + LIBIE_AQ_RC_ESRCH = 3, /* Bad opcode */ + LIBIE_AQ_RC_EAGAIN = 8, /* Try again */ + LIBIE_AQ_RC_ENOMEM = 9, /* Out of memory */ + LIBIE_AQ_RC_EBUSY = 12, /* Device or resource busy */ + LIBIE_AQ_RC_EEXIST = 13, /* Object already exists */ + LIBIE_AQ_RC_EINVAL = 14, /* Invalid argument */ + LIBIE_AQ_RC_ENOSPC = 16, /* No space left or allocation failure */ + LIBIE_AQ_RC_ENOSYS = 17, /* Function not implemented */ + LIBIE_AQ_RC_EMODE = 21, /* Op not allowed in current dev mode */ + LIBIE_AQ_RC_ENOSEC = 24, /* Missing security manifest */ + LIBIE_AQ_RC_EBADSIG = 25, /* Bad RSA signature */ + LIBIE_AQ_RC_ESVN = 26, /* SVN number prohibits this package */ + LIBIE_AQ_RC_EBADMAN = 27, /* Manifest hash mismatch */ + LIBIE_AQ_RC_EBADBUF = 28, /* Buffer hash mismatches manifest */ +}; + +static inline void *libie_aq_raw(struct libie_aq_desc *desc) +{ + return &desc->params.raw; +} + +#endif /* __LIBIE_ADMINQ_H */ -- cgit v1.2.3 From 5b36bef444432b75e7285e33338eb8bad53fe152 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 25 Apr 2025 08:08:03 +0200 Subject: ixgbe: use libie adminq descriptors Use libie_aq_desc instead of ixgbe_aci_desc. Do needed changes to allow clean build. Move additional caps used in ixgbe to libie. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Michal Swiatkowski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index 3676adc33d3e..b8079e7d842a 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -146,8 +146,10 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); /* Device/Function buffer entry, repeated per reported capability */ #define LIBIE_AQC_CAPS_VALID_FUNCTIONS 0x0005 +#define LIBIE_AQC_MAX_VALID_FUNCTIONS 0x8 #define LIBIE_AQC_CAPS_SRIOV 0x0012 #define LIBIE_AQC_CAPS_VF 0x0013 +#define LIBIE_AQC_CAPS_VMDQ 0x0014 #define LIBIE_AQC_CAPS_VSI 0x0017 #define LIBIE_AQC_CAPS_DCB 0x0018 #define LIBIE_AQC_CAPS_RSS 0x0040 @@ -165,9 +167,15 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_PENDING_NET_VER 0x004D #define LIBIE_AQC_CAPS_RDMA 0x0051 #define LIBIE_AQC_CAPS_SENSOR_READING 0x0067 +#define LIBIE_AQC_INLINE_IPSEC 0x0070 +#define LIBIE_AQC_CAPS_NUM_ENABLED_PORTS 0x0072 #define LIBIE_AQC_CAPS_PCIE_RESET_AVOIDANCE 0x0076 #define LIBIE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077 #define LIBIE_AQC_CAPS_NVM_MGMT 0x0080 +#define LIBIE_AQC_CAPS_EXT_TOPO_DEV_IMG0 0x0081 +#define LIBIE_AQC_CAPS_EXT_TOPO_DEV_IMG1 0x0082 +#define LIBIE_AQC_CAPS_EXT_TOPO_DEV_IMG2 0x0083 +#define LIBIE_AQC_CAPS_EXT_TOPO_DEV_IMG3 0x0084 #define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085 #define LIBIE_AQC_CAPS_NAC_TOPOLOGY 0x0087 #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 @@ -236,13 +244,21 @@ LIBIE_CHECK_STRUCT_LEN(32, libie_aq_desc); /* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ #define LIBIE_AQ_LG_BUF 512 +/* Flags sub-structure + * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | + * |DD |CMP|ERR|VFE| * * RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE | + */ #define LIBIE_AQ_FLAG_DD BIT(0) /* 0x1 */ #define LIBIE_AQ_FLAG_CMP BIT(1) /* 0x2 */ #define LIBIE_AQ_FLAG_ERR BIT(2) /* 0x4 */ +#define LIBIE_AQ_FLAG_VFE BIT(3) /* 0x8 */ #define LIBIE_AQ_FLAG_LB BIT(9) /* 0x200 */ #define LIBIE_AQ_FLAG_RD BIT(10) /* 0x400 */ +#define LIBIE_AQ_FLAG_VFC BIT(11) /* 0x800 */ #define LIBIE_AQ_FLAG_BUF BIT(12) /* 0x1000 */ #define LIBIE_AQ_FLAG_SI BIT(13) /* 0x2000 */ +#define LIBIE_AQ_FLAG_EI BIT(14) /* 0x4000 */ +#define LIBIE_AQ_FLAG_FE BIT(15) /* 0x8000 */ /* error codes */ enum libie_aq_err { -- cgit v1.2.3 From b46012a20006a689529b6b51e05a8ad5320f7e7c Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 25 Apr 2025 08:08:04 +0200 Subject: i40e: use libie adminq descriptors Use libie_aq_desc instead of i40e_aq_desc. Do needed changes to allow clean build. Get version descriptor is a little less detailed on i40e. To not mess up with shifting or union inside libie desc use get version descriptor from i40e. Move additional caps for i40e to libie. Fix RCT in declaration that is using libie_aq_desc; Use libie_aq_raw() wherever it can be used. The libie aq error is extended, cover it in ice driver just to clean build. In next patches the libie code for that will be used in each of intel driver. Reviewed-by: Przemek Kitszel Signed-off-by: Michal Swiatkowski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index b8079e7d842a..bab7cecc657f 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -145,17 +145,26 @@ struct libie_aqc_list_caps { LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); /* Device/Function buffer entry, repeated per reported capability */ +#define LIBIE_AQC_CAPS_SWITCH_MODE 0x0001 +#define LIBIE_AQC_CAPS_MNG_MODE 0x0002 +#define LIBIE_AQC_CAPS_NPAR_ACTIVE 0x0003 +#define LIBIE_AQC_CAPS_OS2BMC_CAP 0x0004 #define LIBIE_AQC_CAPS_VALID_FUNCTIONS 0x0005 #define LIBIE_AQC_MAX_VALID_FUNCTIONS 0x8 #define LIBIE_AQC_CAPS_SRIOV 0x0012 #define LIBIE_AQC_CAPS_VF 0x0013 #define LIBIE_AQC_CAPS_VMDQ 0x0014 +#define LIBIE_AQC_CAPS_8021QBG 0x0015 +#define LIBIE_AQC_CAPS_8021QBR 0x0016 #define LIBIE_AQC_CAPS_VSI 0x0017 #define LIBIE_AQC_CAPS_DCB 0x0018 +#define LIBIE_AQC_CAPS_FCOE 0x0021 +#define LIBIE_AQC_CAPS_ISCSI 0x0022 #define LIBIE_AQC_CAPS_RSS 0x0040 #define LIBIE_AQC_CAPS_RXQS 0x0041 #define LIBIE_AQC_CAPS_TXQS 0x0042 #define LIBIE_AQC_CAPS_MSIX 0x0043 +#define LIBIE_AQC_CAPS_VF_MSIX 0x0044 #define LIBIE_AQC_CAPS_FD 0x0045 #define LIBIE_AQC_CAPS_1588 0x0046 #define LIBIE_AQC_CAPS_MAX_MTU 0x0047 @@ -166,6 +175,10 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_NET_VER 0x004C #define LIBIE_AQC_CAPS_PENDING_NET_VER 0x004D #define LIBIE_AQC_CAPS_RDMA 0x0051 +#define LIBIE_AQC_CAPS_LED 0x0061 +#define LIBIE_AQC_CAPS_SDP 0x0062 +#define LIBIE_AQC_CAPS_MDIO 0x0063 +#define LIBIE_AQC_CAPS_WSR_PROT 0x0064 #define LIBIE_AQC_CAPS_SENSOR_READING 0x0067 #define LIBIE_AQC_INLINE_IPSEC 0x0070 #define LIBIE_AQC_CAPS_NUM_ENABLED_PORTS 0x0072 @@ -181,6 +194,8 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 #define LIBIE_AQC_BIT_ROCEV2_LAG 0x01 #define LIBIE_AQC_BIT_SRIOV_LAG 0x02 +#define LIBIE_AQC_CAPS_FLEX10 0x00F1 +#define LIBIE_AQC_CAPS_CEM 0x00F2 /** * struct libie_aqc_list_caps_elem - Getting list of caps elements @@ -266,8 +281,10 @@ enum libie_aq_err { LIBIE_AQ_RC_EPERM = 1, /* Operation not permitted */ LIBIE_AQ_RC_ENOENT = 2, /* No such element */ LIBIE_AQ_RC_ESRCH = 3, /* Bad opcode */ + LIBIE_AQ_RC_EIO = 5, /* I/O error */ LIBIE_AQ_RC_EAGAIN = 8, /* Try again */ LIBIE_AQ_RC_ENOMEM = 9, /* Out of memory */ + LIBIE_AQ_RC_EACCES = 10, /* Permission denied */ LIBIE_AQ_RC_EBUSY = 12, /* Device or resource busy */ LIBIE_AQ_RC_EEXIST = 13, /* Object already exists */ LIBIE_AQ_RC_EINVAL = 14, /* Invalid argument */ -- cgit v1.2.3 From 5feaa7a07b85ebbef418ba4b80e4e0d23dc379f5 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 25 Apr 2025 08:08:06 +0200 Subject: libie: add adminq helper for converting err to str Add a new module for common handling of Admin Queue related logic. Start by a helper for error to string conversion. This lives inside libie/, but is a separate module what follows our logic of splitting into topical modules, to avoid pulling in not needed stuff, and have better organization in general. Olek suggested how to better solve the error to string conversion. It will be used in follow-up patches in ice, i40e and iavf. Reviewed-by: Przemek Kitszel Suggested-by: Alexander Lobakin Signed-off-by: Michal Swiatkowski Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index bab7cecc657f..012b5d499c1a 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -303,4 +303,6 @@ static inline void *libie_aq_raw(struct libie_aq_desc *desc) return &desc->params.raw; } +const char *libie_aq_str(enum libie_aq_err err); + #endif /* __LIBIE_ADMINQ_H */ -- cgit v1.2.3 From ca7be9c0a148cbfe38df95a0285339c532ca6e17 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 8 Jun 2025 17:15:35 +0100 Subject: mtd: ubi: Remove unused ubi_flush ubi_flush() was added in 2012 as part of commit 62f384552b67 ("UBI: modify ubi_wl_flush function to clear work queue for a lnum") but has remained unused. (It's friend ubi_wl_flush() is still used) Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Richard Weinberger --- include/linux/mtd/ubi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 562f92504f2b..c3f79c4be1cc 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -250,7 +250,6 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum); int ubi_leb_map(struct ubi_volume_desc *desc, int lnum); int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum); int ubi_sync(int ubi_num); -int ubi_flush(int ubi_num, int vol_id, int lnum); /* * This function is the same as the 'ubi_leb_read()' function, but it does not -- cgit v1.2.3 From 1c3b002c6bf684b445a7107609979bca5f21bc03 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:49 -0400 Subject: PCI: endpoint: Add RC-to-EP doorbell support using platform MSI controller Implement the doorbell feature by mapping the EP's MSI interrupt controller message address to a dedicated BAR. The EPF driver should pass the actual message data to be written to the message address by the host through implementation-specific logic. Signed-off-by: Frank Li [mani: minor code cleanups and reworded commit message] Signed-off-by: Manivannan Sadhasivam [bhelgaas: fix kernel-doc] Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-3-57683fc7fb25@nxp.com --- include/linux/pci-ep-msi.h | 28 ++++++++++++++++++++++++++++ include/linux/pci-epf.h | 15 +++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 include/linux/pci-ep-msi.h (limited to 'include') diff --git a/include/linux/pci-ep-msi.h b/include/linux/pci-ep-msi.h new file mode 100644 index 000000000000..7c5db90f9620 --- /dev/null +++ b/include/linux/pci-ep-msi.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PCI Endpoint *Function* side MSI header file + * + * Copyright (C) 2024 NXP + * Author: Frank Li + */ + +#ifndef __PCI_EP_MSI__ +#define __PCI_EP_MSI__ + +struct pci_epf; + +#ifdef CONFIG_PCI_ENDPOINT_MSI_DOORBELL +int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums); +void pci_epf_free_doorbell(struct pci_epf *epf); +#else +static inline int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums) +{ + return -ENODATA; +} + +static inline void pci_epf_free_doorbell(struct pci_epf *epf) +{ +} +#endif /* CONFIG_GENERIC_MSI_IRQ */ + +#endif /* __PCI_EP_MSI__ */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 749cee0bcf2c..52e07602f08e 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct pci_epf; @@ -128,6 +129,16 @@ struct pci_epf_bar { int flags; }; +/** + * struct pci_epf_doorbell_msg - represents doorbell message + * @msg: MSI message + * @virq: IRQ number of this doorbell MSI message + */ +struct pci_epf_doorbell_msg { + struct msi_msg msg; + int virq; +}; + /** * struct pci_epf - represents the PCI EPF device * @dev: the PCI EPF device @@ -155,6 +166,8 @@ struct pci_epf_bar { * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function * @event_ops: callbacks for capturing the EPC events + * @db_msg: data for MSI from RC side + * @num_db: number of doorbells */ struct pci_epf { struct device dev; @@ -185,6 +198,8 @@ struct pci_epf { unsigned long vfunction_num_map; struct list_head pci_vepf; const struct pci_epc_event_ops *event_ops; + struct pci_epf_doorbell_msg *db_msg; + u16 num_db; }; /** -- cgit v1.2.3 From 4ff4252a2355f585c5cad8dc959ff1097300aa47 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:51 -0400 Subject: PCI: endpoint: Add pci_epf_align_inbound_addr() helper for inbound address alignment Add pci_epf_align_inbound_addr() to align the inbound addresses according to PCI BAR alignment requirements. The aligned base address and offset are returned via 'base' and 'off' parameters. Signed-off-by: Frank Li [mani: reworded kernel-doc and commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-5-57683fc7fb25@nxp.com --- include/linux/pci-epf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 52e07602f08e..2e85504ba2ba 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -241,6 +241,9 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); + +int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar, + u64 addr, dma_addr_t *base, size_t *off); int pci_epf_bind(struct pci_epf *epf); void pci_epf_unbind(struct pci_epf *epf); int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf); -- cgit v1.2.3 From eefb83790a0dda112d1755e4f5e213738d717e76 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 10 Jul 2025 15:13:53 -0400 Subject: misc: pci_endpoint_test: Add doorbell test case Add doorbell support with the help of three new registers: PCIE_ENDPOINT_TEST_DB_BAR, PCIE_ENDPOINT_TEST_DB_ADDR, and PCIE_ENDPOINT_TEST_DB_DATA. The testcase works by triggering the doorbell in Endpoint by writing the value from PCI_ENDPOINT_TEST_DB_DATA register to the address provided by PCI_ENDPOINT_TEST_DB_OFFSET register of the BAR indicated by the PCIE_ENDPOINT_TEST_DB_BAR register and waiting for the completion status from the Endpoint. Signed-off-by: Frank Li [mani: removed one spurious change and reworded the commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Niklas Cassel Link: https://patch.msgid.link/20250710-ep-msi-v21-7-57683fc7fb25@nxp.com --- include/uapi/linux/pcitest.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index d3aa8715a525..d6023a45a9d0 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -21,6 +21,7 @@ #define PCITEST_SET_IRQTYPE _IOW('P', 0x8, int) #define PCITEST_GET_IRQTYPE _IO('P', 0x9) #define PCITEST_BARS _IO('P', 0xa) +#define PCITEST_DOORBELL _IO('P', 0xb) #define PCITEST_CLEAR_IRQ _IO('P', 0x10) #define PCITEST_IRQ_TYPE_UNDEFINED -1 -- cgit v1.2.3 From 8245d47cfaba8a38337a447230b4d01f9946f5e1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Jul 2025 22:50:26 -0700 Subject: x86: Handle KCOV __init vs inline mismatches GCC appears to have kind of fragile inlining heuristics, in the sense that it can change whether or not it inlines something based on optimizations. It looks like the kcov instrumentation being added (or in this case, removed) from a function changes the optimization results, and some functions marked "inline" are _not_ inlined. In that case, we end up with __init code calling a function not marked __init, and we get the build warnings I'm trying to eliminate in the coming patch that adds __no_sanitize_coverage to __init functions: WARNING: modpost: vmlinux: section mismatch in reference: xbc_exit+0x8 (section: .text.unlikely) -> _xbc_exit (section: .init.text) WARNING: modpost: vmlinux: section mismatch in reference: real_mode_size_needed+0x15 (section: .text.unlikely) -> real_mode_blob_end (section: .init.data) WARNING: modpost: vmlinux: section mismatch in reference: __set_percpu_decrypted+0x16 (section: .text.unlikely) -> early_set_memory_decrypted (section: .init.text) WARNING: modpost: vmlinux: section mismatch in reference: memblock_alloc_from+0x26 (section: .text.unlikely) -> memblock_alloc_try_nid (section: .init.text) WARNING: modpost: vmlinux: section mismatch in reference: acpi_arch_set_root_pointer+0xc (section: .text.unlikely) -> x86_init (section: .init.data) WARNING: modpost: vmlinux: section mismatch in reference: acpi_arch_get_root_pointer+0x8 (section: .text.unlikely) -> x86_init (section: .init.data) WARNING: modpost: vmlinux: section mismatch in reference: efi_config_table_is_usable+0x16 (section: .text.unlikely) -> xen_efi_config_table_is_usable (section: .init.text) This problem is somewhat fragile (though using either __always_inline or __init will deterministically solve it), but we've tripped over this before with GCC and the solution has usually been to just use __always_inline and move on. For x86 this means forcing several functions to be inline with __always_inline. Link: https://lore.kernel.org/r/20250724055029.3623499-2-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/acpi.h | 4 ++-- include/linux/bootconfig.h | 2 +- include/linux/efi.h | 2 +- include/linux/memblock.h | 2 +- include/linux/smp.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f102c0fe3431..fc7f5e039074 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -759,13 +759,13 @@ int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count) #endif #ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER -static inline void acpi_arch_set_root_pointer(u64 addr) +static __always_inline void acpi_arch_set_root_pointer(u64 addr) { } #endif #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER -static inline u64 acpi_arch_get_root_pointer(void) +static __always_inline u64 acpi_arch_get_root_pointer(void) { return 0; } diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 3f4b4ac527ca..25df9260d206 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -290,7 +290,7 @@ int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ void __init _xbc_exit(bool early); -static inline void xbc_exit(void) +static __always_inline void xbc_exit(void) { _xbc_exit(false); } diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d63d1d75f22..e3776d9cad07 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1334,7 +1334,7 @@ struct linux_efi_initrd { bool xen_efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table); -static inline +static __always_inline bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) { if (!IS_ENABLED(CONFIG_XEN_EFI)) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bb19a2534224..b96746376e17 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -463,7 +463,7 @@ static inline void *memblock_alloc_raw(phys_addr_t size, NUMA_NO_NODE); } -static inline void *memblock_alloc_from(phys_addr_t size, +static __always_inline void *memblock_alloc_from(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) { diff --git a/include/linux/smp.h b/include/linux/smp.h index f1aa0952e8c3..84e948eb1c20 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -221,7 +221,7 @@ static inline void wake_up_all_idle_cpus(void) { } #ifdef CONFIG_UP_LATE_INIT extern void __init up_late_init(void); -static inline void smp_init(void) { up_late_init(); } +static __always_inline void smp_init(void) { up_late_init(); } #else static inline void smp_init(void) { } #endif -- cgit v1.2.3 From 0dec7201788b9152f06321d0dab46eed93834cda Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 21 Jul 2025 16:15:57 +1000 Subject: sprintf.h requires stdarg.h In file included from drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.c:4: include/linux/sprintf.h:11:54: error: unknown type name 'va_list' 11 | __printf(2, 0) int vsprintf(char *buf, const char *, va_list); | ^~~~~~~ include/linux/sprintf.h:1:1: note: 'va_list' is defined in header ''; this is probably fixable by adding '#include ' Link: https://lkml.kernel.org/r/20250721173754.42865913@canb.auug.org.au Fixes: 39ced19b9e60 ("lib/vsprintf: split out sprintf() and friends") Signed-off-by: Stephen Rothwell Cc: Andriy Shevchenko Cc: Herbert Xu Cc: Petr Mladek Cc: Steven Rostedt Cc: Rasmus Villemoes Cc: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton --- include/linux/sprintf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 51cab2def9ec..876130091384 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -4,6 +4,7 @@ #include #include +#include int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); -- cgit v1.2.3 From 71c52411c51bf4f0869c572294ce8123b26528d5 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:29 +0000 Subject: net: Create separate gro_flush_normal function Move multiple copies of same code snippet doing `gro_flush` and `gro_normal_list` into separate helper function. Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250723013031.2911384-2-skhawaja@google.com Signed-off-by: Jakub Kicinski --- include/net/gro.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 22d3a69e4404..a0fca7ac6e7e 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -534,6 +534,12 @@ static inline void gro_normal_list(struct gro_node *gro) gro->rx_count = 0; } +static inline void gro_flush_normal(struct gro_node *gro, bool flush_old) +{ + gro_flush(gro, flush_old); + gro_normal_list(gro); +} + /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ -- cgit v1.2.3 From 78afdadafe6fe0c74c08fda156e7be0a0b402b90 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:30 +0000 Subject: net: Use netif_threaded_enable instead of netif_set_threaded in drivers Prepare for adding an enum type for NAPI threaded states by adding netif_threaded_enable API. De-export the existing netif_set_threaded API and only use it internally. Update existing drivers to use netif_threaded_enable instead of the de-exported netif_set_threaded. Note that dev_set_threaded used by mt76 debugfs file is unchanged. Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250723013031.2911384-3-skhawaja@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5aee8d3895f4..a97c9a337d6b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -589,7 +589,7 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -int netif_set_threaded(struct net_device *dev, bool threaded); +void netif_threaded_enable(struct net_device *dev); int dev_set_threaded(struct net_device *dev, bool threaded); void napi_disable(struct napi_struct *n); -- cgit v1.2.3 From 8e7583a4f65f3dbf3e8deb4e60f3679c276bef62 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:31 +0000 Subject: net: define an enum for the napi threaded state Instead of using '0' and '1' for napi threaded state use an enum with 'disabled' and 'enabled' states. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250723013031.2911384-4-skhawaja@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 +++++----- include/uapi/linux/netdev.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a97c9a337d6b..5e5de4b0a433 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,7 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; - bool threaded; + u8 threaded; unsigned int napi_id; }; @@ -590,7 +590,8 @@ static inline bool napi_complete(struct napi_struct *n) } void netif_threaded_enable(struct net_device *dev); -int dev_set_threaded(struct net_device *dev, bool threaded); +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); void napi_disable(struct napi_struct *n); void napi_disable_locked(struct napi_struct *n); @@ -1872,6 +1873,7 @@ enum netdev_reg_state { * @addr_len: Hardware address length * @upper_level: Maximum depth level of upper devices. * @lower_level: Maximum depth level of lower devices. + * @threaded: napi threaded state. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address @@ -2011,8 +2013,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @threaded: napi threaded mode is enabled - * * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ * affinity. Set by netif_enable_irq_affinity(), then * the driver must create a persistent napi by @@ -2248,6 +2248,7 @@ struct net_device { unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; + u8 threaded; unsigned short neigh_priv_len; unsigned short dev_id; @@ -2429,7 +2430,6 @@ struct net_device { struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; - bool threaded; bool irq_affinity_auto; bool rx_cpu_rmap_auto; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, -- cgit v1.2.3 From 3865301dc58aec2ab77651bdbf1e55352f50a608 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 13 Jul 2025 12:57:18 -0700 Subject: mm: optimize lru_note_cost() by adding lru_note_cost_unlock_irq() Dropping a lock, just to demand it again for an afterthought, cannot be good if contended: convert lru_note_cost() to lru_note_cost_unlock_irq(). [hughd@google.com: delete unneeded comment] Link: https://lkml.kernel.org/r/dbf9352a-1ed9-a021-c0c7-9309ac73e174@google.com Link: https://lkml.kernel.org/r/21100102-51b6-79d5-03db-1bb7f97fa94c@google.com Signed-off-by: Hugh Dickins Acked-by: Johannes Weiner Reviewed-by: Roman Gushchin Tested-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 95c6061fa1dc..2fe6ed2cc3fd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -376,8 +376,9 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ -void lru_note_cost(struct lruvec *lruvec, bool file, - unsigned int nr_io, unsigned int nr_rotated); +void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, + unsigned int nr_io, unsigned int nr_rotated) + __releases(lruvec->lru_lock); void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); -- cgit v1.2.3 From f9f11398d4dac3c85507f31192e318b20b19af61 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 17 Jul 2025 17:55:55 +0100 Subject: mm/mremap: use an explicit uffd failure path for mremap Right now it appears that the code is relying upon the returned destination address having bits outside PAGE_MASK to indicate whether an error value is specified, and decrementing the increased refcount on the uffd ctx if so. This is not a safe means of determining an error value, so instead, be specific. It makes far more sense to do so in a dedicated error path, so add mremap_userfaultfd_fail() for this purpose and use this when an error arises. A vm_userfaultfd_ctx is not established until we are at the point where mremap_userfaultfd_prep() is invoked in copy_vma_and_data(), so this is a no-op until this happens. That is - uffd remap notification only occurs if the VMA is actually moved - at which point a UFFD_EVENT_REMAP event is raised. No errors can occur after this point currently, though it's certainly not guaranteed this will always remain the case, and we mustn't rely on this. However, the reason for needing to handle this case is that, when an error arises on a VMA move at the point of adjusting page tables, we revert this operation, and propagate the error. At this point, it is not correct to raise a uffd remap event, and we must handle it. This refactoring makes it abundantly clear what we are doing. We assume vrm->new_addr is always valid, which a prior change made the case even for mremap() invocations which don't move the VMA, however given no uffd context would be set up in this case it's immaterial to this change anyway. No functional change intended. Link: https://lkml.kernel.org/r/a70e8a1f7bce9f43d1431065b414e0f212297297.1752770784.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Cc: Peter Xu Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index df85330bcfa6..c0e716aec26a 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -259,6 +259,7 @@ extern void mremap_userfaultfd_prep(struct vm_area_struct *, extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); +void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, @@ -371,6 +372,10 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } +static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) +{ +} + static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) -- cgit v1.2.3 From 441413d2a99d1d23bea2df2497493024b00ace57 Mon Sep 17 00:00:00 2001 From: Anthony Yznaga Date: Tue, 15 Jul 2025 18:26:11 -0700 Subject: mm: drop hugetlb_free_pgd_range() There are no longer any callers of hugetlb_free_pgd_range(). Link: https://lkml.kernel.org/r/20250716012611.10369-4-anthony.yznaga@oracle.com Signed-off-by: Anthony Yznaga Acked-by: Mike Rapoport (Microsoft) Acked-by: Oscar Salvador Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Christophe Leroy Cc: David Hildenbrand Cc: David S. Miller Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Muchun Song Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/hugetlb.h | 9 --------- include/linux/hugetlb.h | 7 ------- 2 files changed, 16 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 4bce4f07f44f..dcb8727f2b82 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -66,15 +66,6 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, } #endif -#ifndef __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - free_pgd_range(tlb, addr, end, floor, ceiling); -} -#endif - #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 474de8e2a8f2..526d27e88b3b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -390,13 +390,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return 0; } -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - BUG(); -} - #ifdef CONFIG_USERFAULTFD static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, struct vm_area_struct *dst_vma, -- cgit v1.2.3 From 378bdb97405a00bf03d8d993435c83add1688e36 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Jul 2025 19:46:43 +0000 Subject: memcg: convert memcg->socket_pressure to u64 memcg->socket_pressure is initialised with jiffies when the memcg is created. Once vmpressure detects that the cgroup is under memory pressure, the field is updated with jiffies + HZ to signal the fact to the socket layer and suppress memory allocation for one second. Otherwise, the field is not updated. mem_cgroup_under_socket_pressure() uses time_before() to check if jiffies is less than memcg->socket_pressure, and this has a bug on 32-bit kernel. if (time_before(jiffies, memcg->socket_pressure)) return true; As time_before() casts the final result to long, the acceptable delta between two timestamps is 2 ^ (BITS_PER_LONG - 1). On 32-bit kernel with CONFIG_HZ=1000, this is about 24 days. >>> (2 ** 31) / 1000 / 60 / 60 / 24 24.855134814814818 Once 24 days have passed since the last update of socket_pressure, mem_cgroup_under_socket_pressure() starts to lie until the next 24 days pass. We don't need to worry about this on 64-bit machines unless they serve for 300 million years. >>> (2 ** 63) / 1000 / 60 / 60 / 24 / 365 292471208.6775361 Let's convert memcg->socket_pressure to u64. Performance teting: I don't have a real 32-bit machine so this is a result on QEMU, but with/without the u64 jiffie patch, the time spent in mem_cgroup_under_socket_pressure() was 1~5us and I didn't see any measurable delta. no patch applied: iperf3 273 [000] 137.296248: probe:mem_cgroup_under_socket_pressure: (c13660d0) c13660d1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 273 [000] 137.296249: probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f) iperf3 273 [000] 137.296251: probe:mem_cgroup_under_socket_pressure: (c13660d0) c13660d1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 273 [000] 137.296253: probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f) u64 jiffies patch applied: iperf3 308 [001] 330.669370: probe:mem_cgroup_under_socket_pressure: (c12ddba0) c12ddba1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 308 [001] 330.669371: probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf) iperf3 308 [001] 330.669382: probe:mem_cgroup_under_socket_pressure: (c12ddba0) c12ddba1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 308 [001] 330.669384: probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf) So the u64 approach is good enough. Link: https://lkml.kernel.org/r/20250717194645.1096500-1-kuniyu@google.com Fixes: 8e8ae645249b ("mm: memcontrol: hook up vmpressure to socket pressure") Signed-off-by: Kuniyuki Iwashima Reported-by: Neal Cardwell Suggested-by: Andrew Morton Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: David S. Miller Cc: Eric Dumazet Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 87b6688f124a..785173aa0739 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -251,8 +251,10 @@ struct mem_cgroup { * that this indicator should NOT be used in legacy cgroup mode * where socket memory is accounted/charged separately. */ - unsigned long socket_pressure; - + u64 socket_pressure; +#if BITS_PER_LONG < 64 + seqlock_t socket_pressure_seqlock; +#endif int kmemcg_id; /* * memcg->objcg is wiped out as a part of the objcg repaprenting @@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); + +#if BITS_PER_LONG < 64 +static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) +{ + u64 val = get_jiffies_64() + HZ; + unsigned long flags; + + write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags); + memcg->socket_pressure = val; + write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags); +} + +static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) +{ + unsigned int seq; + u64 val; + + do { + seq = read_seqbegin(&memcg->socket_pressure_seqlock); + val = memcg->socket_pressure; + } while (read_seqretry(&memcg->socket_pressure_seqlock, seq)); + + return val; +} +#else +static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) +{ + WRITE_ONCE(memcg->socket_pressure, jiffies + HZ); +} + +static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) +{ + return READ_ONCE(memcg->socket_pressure); +} +#endif + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { #ifdef CONFIG_MEMCG_V1 @@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return !!memcg->tcpmem_pressure; #endif /* CONFIG_MEMCG_V1 */ do { - if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) return true; } while ((memcg = parent_mem_cgroup(memcg))); return false; -- cgit v1.2.3 From 92c99fc614737eaa99125283c306d2ebb13b101a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 14 Jul 2025 09:16:51 -0400 Subject: mm/memory: introduce is_huge_zero_pfn() and use it in vm_normal_page_pmd() Patch series "mm: introduce snapshot_page()", v3. This series introduces snapshot_page(), a helper function that can be used to create a snapshot of a struct page and its associated struct folio. This function is intended to help callers with a consistent view of a a folio while reducing the chance of encountering partially updated or inconsistent state, such as during folio splitting which could lead to crashes and BUG_ON()s being triggered. This patch (of 4): Let's avoid working with the PMD when not required. If vm_normal_page_pmd() would be called on something that is not a present pmd, it would already be a bug (pfn possibly garbage). While at it, let's support passing in any pfn covered by the huge zero folio by masking off PFN bits -- which should be rather cheap. Link: https://lkml.kernel.org/r/cover.1752499009.git.luizcap@redhat.com Link: https://lkml.kernel.org/r/4940826e99f0c709a7cf7beb94f53288320aea5a.1752499009.git.luizcap@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Luiz Capitulino Reviewed-by: Shivank Garg Tested-by: Harry Yoo Cc: Matthew Wilcox (Oracle) Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4d5bb67dc4ec..7748489fde1b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -482,9 +482,14 @@ static inline bool is_huge_zero_folio(const struct folio *folio) return READ_ONCE(huge_zero_folio) == folio; } +static inline bool is_huge_zero_pfn(unsigned long pfn) +{ + return READ_ONCE(huge_zero_pfn) == (pfn & ~(HPAGE_PMD_NR - 1)); +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { - return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); + return pmd_present(pmd) && is_huge_zero_pfn(pmd_pfn(pmd)); } struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); @@ -632,6 +637,11 @@ static inline bool is_huge_zero_folio(const struct folio *folio) return false; } +static inline bool is_huge_zero_pfn(unsigned long pfn) +{ + return false; +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { return false; -- cgit v1.2.3 From d863a12108f24c5f0b49f99f328e33371bd7c69d Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 14 Jul 2025 09:16:52 -0400 Subject: mm/util: introduce snapshot_page() This commit refactors __dump_page() into snapshot_page(). snapshot_page() tries to take a faithful snapshot of a page and its folio representation. The snapshot is returned in the struct page_snapshot parameter along with additional flags that are best retrieved at snapshot creation time to reduce race windows. This function is intended to be used by callers that need a stable representation of a struct page and struct folio so that pointers or page information doesn't change while working on a page. The idea and original implementation of snapshot_page() comes from Matthew Wilcox with suggestions for improvements from David Hildenbrand. All bugs and misconceptions are mine. [luizcap@redhat.com: fix set_ps_flags() commentary] Link: https://lkml.kernel.org/r/d5c75701-b353-4536-a306-187fab0655b3@redhat.com Link: https://lkml.kernel.org/r/637a03a05cb2e3df88f84ff9e9f9642374ef813a.1752499009.git.luizcap@redhat.com Signed-off-by: Luiz Capitulino Reviewed-by: Shivank Garg Tested-by: Harry Yoo Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Oscar Salvador Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/mm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 805108d7bbc3..8e3a4c5b78ff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4199,4 +4199,23 @@ static inline bool page_pool_page_is_pp(struct page *page) } #endif +#define PAGE_SNAPSHOT_FAITHFUL (1 << 0) +#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1) +#define PAGE_SNAPSHOT_PG_IDLE (1 << 2) + +struct page_snapshot { + struct folio folio_snapshot; + struct page page_snapshot; + unsigned long pfn; + unsigned long idx; + unsigned long flags; +}; + +static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) +{ + return ps->flags & PAGE_SNAPSHOT_FAITHFUL; +} + +void snapshot_page(struct page_snapshot *ps, const struct page *page); + #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 5631da56c9a87ea41d69d1bbbc1cee327eb9354b Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 19 Jul 2025 11:28:54 -0700 Subject: fs/proc/task_mmu: read proc/pid/maps under per-vma lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With maple_tree supporting vma tree traversal under RCU and per-vma locks, /proc/pid/maps can be read while holding individual vma locks instead of locking the entire address space. A completely lockless approach (walking vma tree under RCU) would be quite complex with the main issue being get_vma_name() using callbacks which might not work correctly with a stable vma copy, requiring original (unstable) vma - see special_mapping_name() for example. When per-vma lock acquisition fails, we take the mmap_lock for reading, lock the vma, release the mmap_lock and continue. This fallback to mmap read lock guarantees the reader to make forward progress even during lock contention. This will interfere with the writer but for a very short time while we are acquiring the per-vma lock and only when there was contention on the vma reader is interested in. We shouldn't see a repeated fallback to mmap read locks in practice, as this require a very unlikely series of lock contentions (for instance due to repeated vma split operations). However even if this did somehow happen, we would still progress. One case requiring special handling is when a vma changes between the time it was found and the time it got locked. A problematic case would be if a vma got shrunk so that its vm_start moved higher in the address space and a new vma was installed at the beginning: reader found: |--------VMA A--------| VMA is modified: |-VMA B-|----VMA A----| reader locks modified VMA A reader reports VMA A: | gap |----VMA A----| This would result in reporting a gap in the address space that does not exist. To prevent this we retry the lookup after locking the vma, however we do that only when we identify a gap and detect that the address space was changed after we found the vma. This change is designed to reduce mmap_lock contention and prevent a process reading /proc/pid/maps files (often a low priority task, such as monitoring/data collection services) from blocking address space updates. Note that this change has a userspace visible disadvantage: it allows for sub-page data tearing as opposed to the previous mechanism where data tearing could happen only between pages of generated output data. Since current userspace considers data tearing between pages to be acceptable, we assume is will be able to handle sub-page data tearing as well. Link: https://lkml.kernel.org/r/20250719182854.3166724-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Cc: Alexey Dobriyan Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Christophe Leroy Cc: David Hildenbrand Cc: Jann Horn Cc: Jeongjun Park Cc: Johannes Weiner Cc: Josef Bacik Cc: Kalesh Singh Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Oscar Salvador Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Ryan Roberts Cc: Shuah Khan Cc: Thomas Weißschuh Cc: T.J. Mercier Cc: Ye Bin Signed-off-by: Andrew Morton --- include/linux/mmap_lock.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 5da384bd0a26..1f4f44951abe 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -309,6 +309,17 @@ void vma_mark_detached(struct vm_area_struct *vma); struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address); +/* + * Locks next vma pointed by the iterator. Confirms the locked vma has not + * been modified and will retry under mmap_lock protection if modification + * was detected. Should be called from read RCU section. + * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the + * process was interrupted. + */ +struct vm_area_struct *lock_next_vma(struct mm_struct *mm, + struct vma_iterator *iter, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} -- cgit v1.2.3 From 0aa3657df3ec713fca1f00a57a063b28f2a78147 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Fri, 18 Jul 2025 14:32:40 +0530 Subject: mm: add batched versions of ptep_modify_prot_start/commit Batch ptep_modify_prot_start/commit in preparation for optimizing mprotect, implementing them as a simple loop over the corresponding single pte helpers. Architecture may override these helpers. Link: https://lkml.kernel.org/r/20250718090244.21092-4-dev.jain@arm.com Signed-off-by: Dev Jain Reviewed-by: Lorenzo Stoakes Reviewed-by: Barry Song Reviewed-by: Ryan Roberts Reviewed-by: Zi Yan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Joey Gouly Cc: Kevin Brodsky Cc: Lance Yang Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Vlastimil Babka Cc: Will Deacon Cc: Yang Shi Cc: Yicong Yang Cc: Zhenhua Huang Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cf1515c163e2..e3b99920be05 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1331,7 +1331,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, /* * Commit an update to a pte, leaving any hardware-controlled bits in - * the PTE unmodified. + * the PTE unmodified. The pte returned from ptep_modify_prot_start() may + * additionally have young and/or dirty bits set where previously they were not, + * so the updated pte may have these additional changes. */ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, @@ -1340,6 +1342,86 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, __ptep_modify_prot_commit(vma, addr, ptep, pte); } #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ + +/** + * modify_prot_start_ptes - Start a pte protection read-modify-write transaction + * over a batch of ptes, which protects against asynchronous hardware + * modifications to the ptes. The intention is not to prevent the hardware from + * making pte updates, but to prevent any updates it may make from being lost. + * Please see the comment above ptep_modify_prot_start() for full description. + * + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_modify_prot_start(), collecting the a/d bits from each pte + * in the batch. + * + * Note that PTE bits in the PTE batch besides the PFN can differ. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. All other PTE bits must be identical for + * all PTEs in the batch except for young and dirty bits. The PTEs are all in + * the same PMD. + */ +#ifndef modify_prot_start_ptes +static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + pte_t pte, tmp_pte; + + pte = ptep_modify_prot_start(vma, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_modify_prot_start(vma, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} +#endif + +/** + * modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any + * hardware-controlled bits in the PTE unmodified. + * + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @old_pte: Old page table entry (for the first entry) which is now cleared. + * @pte: New page table entry to be set. + * @nr: Number of entries. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_modify_prot_commit(). + * + * Context: The caller holds the page table lock. The PTEs are all in the same + * PMD. On exit, the set ptes in the batch map the same folio. The ptes set by + * ptep_modify_prot_start() may additionally have young and/or dirty bits set + * where previously they were not, so the updated ptes may have these + * additional changes. + */ +#ifndef modify_prot_commit_ptes +static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr) +{ + int i; + + for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) { + ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); + + /* Advance PFN only, set same prot */ + old_pte = pte_next_pfn(old_pte); + pte = pte_next_pfn(pte); + } +} +#endif + #endif /* CONFIG_MMU */ /* -- cgit v1.2.3 From 45cd52c44e85453c9147d41b715cd03c53325cf4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Jul 2025 21:46:18 +0100 Subject: mm: remove grab_cache_page() All callers have been converted to use filemap_grab_folio(). Link: https://lkml.kernel.org/r/20250721204619.163883-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 10a222e68b85..a5ef64a15f96 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -878,7 +878,8 @@ static inline struct page *find_or_create_page(struct address_space *mapping, * @mapping: target address_space * @index: the page index * - * Same as grab_cache_page(), but do not wait if the page is unavailable. + * Returns locked page at given index in given cache, creating it if + * needed, but do not wait if the page is locked or to reclaim memory. * This is intended for speculative data generators, where the data can * be regenerated if the page couldn't be grabbed. This routine should * be safe to call while holding the lock for another page. @@ -942,15 +943,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); -/* - * Returns locked page at given index in given cache, creating it if needed. - */ -static inline struct page *grab_cache_page(struct address_space *mapping, - pgoff_t index) -{ - return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); -} - struct folio *read_cache_folio(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index, -- cgit v1.2.3 From ffc72771ff6ec9f5b431a86c4b00d8ef0fea958b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 25 Jul 2025 13:03:27 +0200 Subject: regmap: Annotate that MMIO implies fast IO Document that using the MMIO helpers will automatically enable the 'fast_io' parameter. This makes the used locking scheme more transparent and avoids superfluous setting of this parameter in drivers. Signed-off-by: Wolfram Sang Link: https://patch.msgid.link/20250725110337.4303-2-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 02b83f5499b8..4e1ac1fbcec4 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -913,7 +913,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer to - * a struct regmap. + * a struct regmap. Implies 'fast_io'. */ #define regmap_init_mmio_clk(dev, clk_id, regs, config) \ __regmap_lockdep_wrapper(__regmap_init_mmio_clk, #config, \ @@ -927,7 +927,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer to - * a struct regmap. + * a struct regmap. Implies 'fast_io'. */ #define regmap_init_mmio(dev, regs, config) \ regmap_init_mmio_clk(dev, NULL, regs, config) @@ -1138,7 +1138,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); * * The return value will be an ERR_PTR() on error or a valid pointer * to a struct regmap. The regmap will be automatically freed by the - * device management code. + * device management code. Implies 'fast_io'. */ #define devm_regmap_init_mmio_clk(dev, clk_id, regs, config) \ __regmap_lockdep_wrapper(__devm_regmap_init_mmio_clk, #config, \ @@ -1153,7 +1153,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); * * The return value will be an ERR_PTR() on error or a valid pointer * to a struct regmap. The regmap will be automatically freed by the - * device management code. + * device management code. Implies 'fast_io'. */ #define devm_regmap_init_mmio(dev, regs, config) \ devm_regmap_init_mmio_clk(dev, NULL, regs, config) -- cgit v1.2.3 From 43cf0e05089afe23dac74fa6e1e109d49f2903c4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 10:12:59 -0400 Subject: powerpc/thp: tracing: Hide hugepage events under CONFIG_PPC_BOOK3S_64 The events hugepage_set_pmd, hugepage_set_pud, hugepage_update_pmd and hugepage_update_pud are only called when CONFIG_PPC_BOOK3S_64 is defined. As each event can take up to 5K regardless if they are used or not, it's best not to define them when they are not used. Add #ifdef around these events when they are not used. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Michael Ellerman Link: https://lore.kernel.org/20250612101259.0ad43e48@batman.local.home Acked-by: David Hildenbrand Acked-by: Madhavan Srinivasan Signed-off-by: Steven Rostedt (Google) --- include/trace/events/thp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h index f50048af5fcc..c8fe879d5828 100644 --- a/include/trace/events/thp.h +++ b/include/trace/events/thp.h @@ -8,6 +8,7 @@ #include #include +#ifdef CONFIG_PPC_BOOK3S_64 DECLARE_EVENT_CLASS(hugepage_set, TP_PROTO(unsigned long addr, unsigned long pte), @@ -66,6 +67,7 @@ DEFINE_EVENT(hugepage_update, hugepage_update_pud, TP_PROTO(unsigned long addr, unsigned long pud, unsigned long clr, unsigned long set), TP_ARGS(addr, pud, clr, set) ); +#endif /* CONFIG_PPC_BOOK3S_64 */ DECLARE_EVENT_CLASS(migration_pmd, -- cgit v1.2.3 From 54091eee08acebfb5e971611c3f189e7577a1058 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:14 +0900 Subject: scsi: libsas: Refactor dev_is_sata() Use a switch statement in dev_is_sata() to make the code more readable (and probably slightly better than a series of or conditions). Also have this inline function return a boolean instead of an integer. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-2-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- include/scsi/sas_ata.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 92e27e7bf088..8dddd0036f99 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -15,10 +15,17 @@ #ifdef CONFIG_SCSI_SAS_ATA -static inline int dev_is_sata(struct domain_device *dev) +static inline bool dev_is_sata(struct domain_device *dev) { - return dev->dev_type == SAS_SATA_DEV || dev->dev_type == SAS_SATA_PM || - dev->dev_type == SAS_SATA_PM_PORT || dev->dev_type == SAS_SATA_PENDING; + switch (dev->dev_type) { + case SAS_SATA_DEV: + case SAS_SATA_PENDING: + case SAS_SATA_PM: + case SAS_SATA_PM_PORT: + return true; + default: + return false; + } } int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); @@ -49,9 +56,9 @@ static inline void sas_ata_disabled_notice(void) pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); } -static inline int dev_is_sata(struct domain_device *dev) +static inline bool dev_is_sata(struct domain_device *dev) { - return 0; + return false; } static inline int sas_ata_init(struct domain_device *dev) { -- cgit v1.2.3 From bd31394aabf36ee18781c6371e02d789484ffda3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:16 +0900 Subject: scsi: libsas: Make sas_get_ata_info() static The function sas_get_ata_info() is used only in drivers/scsi/libsas/sas_ata.c. Remove its definition from include/scsi/sas_ata.h and make this function static. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-4-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- include/scsi/sas_ata.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 8dddd0036f99..5e3475975aee 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -28,7 +28,6 @@ static inline bool dev_is_sata(struct domain_device *dev) } } -int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); int sas_ata_init(struct domain_device *dev); void sas_ata_task_abort(struct sas_task *task); void sas_ata_strategy_handler(struct Scsi_Host *shost); @@ -96,11 +95,6 @@ static inline void sas_resume_sata(struct asd_sas_port *port) { } -static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) -{ - return 0; -} - static inline void sas_ata_end_eh(struct ata_port *ap) { } -- cgit v1.2.3 From 704ed03abf6b1c2752a8b16446a5ebf18694fefe Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:17 +0900 Subject: scsi: libsas: Move declarations of internal functions to sas_internal.h Move the declaration of all functions used only within libsas from include/scsi/sas_ata.h to drivers/scsi/libsas/sas_internal.h. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-5-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- include/scsi/sas_ata.h | 68 +------------------------------------------------- 1 file changed, 1 insertion(+), 67 deletions(-) (limited to 'include') diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 5e3475975aee..a161c0222931 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -28,77 +28,24 @@ static inline bool dev_is_sata(struct domain_device *dev) } } -int sas_ata_init(struct domain_device *dev); -void sas_ata_task_abort(struct sas_task *task); -void sas_ata_strategy_handler(struct Scsi_Host *shost); -void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q); void sas_ata_schedule_reset(struct domain_device *dev); -void sas_ata_wait_eh(struct domain_device *dev); -void sas_probe_sata(struct asd_sas_port *port); -void sas_suspend_sata(struct asd_sas_port *port); -void sas_resume_sata(struct asd_sas_port *port); -void sas_ata_end_eh(struct ata_port *ap); void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset); -int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, - int force_phy_id); +int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id); int smp_ata_check_ready_type(struct ata_link *link); -int sas_discover_sata(struct domain_device *dev); -int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, - struct domain_device *child, int phy_id); extern const struct attribute_group sas_ata_sdev_attr_group; #else -static inline void sas_ata_disabled_notice(void) -{ - pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); -} - static inline bool dev_is_sata(struct domain_device *dev) { return false; } -static inline int sas_ata_init(struct domain_device *dev) -{ - return 0; -} -static inline void sas_ata_task_abort(struct sas_task *task) -{ -} - -static inline void sas_ata_strategy_handler(struct Scsi_Host *shost) -{ -} - -static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q) -{ -} static inline void sas_ata_schedule_reset(struct domain_device *dev) { } -static inline void sas_ata_wait_eh(struct domain_device *dev) -{ -} - -static inline void sas_probe_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_suspend_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_resume_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_ata_end_eh(struct ata_port *ap) -{ -} - static inline void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset) { @@ -115,19 +62,6 @@ static inline int smp_ata_check_ready_type(struct ata_link *link) return 0; } -static inline int sas_discover_sata(struct domain_device *dev) -{ - sas_ata_disabled_notice(); - return -ENXIO; -} - -static inline int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, - struct domain_device *child, int phy_id) -{ - sas_ata_disabled_notice(); - return -ENODEV; -} - #define sas_ata_sdev_attr_group ((struct attribute_group) {}) #endif -- cgit v1.2.3 From 24cbfe18d55a6866bc2e27fda74306f4a1b5cb01 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 24 Jul 2025 19:33:27 +0200 Subject: rv: Merge struct rv_monitor_def into struct rv_monitor Each struct rv_monitor has a unique struct rv_monitor_def associated with it. struct rv_monitor is statically allocated, while struct rv_monitor_def is dynamically allocated. This makes the code more complicated than it should be: - Lookup is required to get the associated rv_monitor_def from rv_monitor - Dynamic memory allocation is required for rv_monitor_def. This is harder to get right compared to static memory. For instance, there is an existing mistake: rv_unregister_monitor() does not free the memory allocated by rv_register_monitor(). This is fortunately not a real memory leak problem, as rv_unregister_monitor() is never called. Simplify and merge rv_monitor_def into rv_monitor. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/194449c00f87945c207aab4c96920c75796a4f53.1753378331.git.namcao@linutronix.de Reviewed-by: Gabriele Monaco Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 97baf58d88b2..dba53aecdfab 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -7,6 +7,9 @@ #ifndef _LINUX_RV_H #define _LINUX_RV_H +#include +#include + #define MAX_DA_NAME_LEN 32 #ifdef CONFIG_RV @@ -98,8 +101,13 @@ struct rv_monitor { void (*disable)(void); void (*reset)(void); #ifdef CONFIG_RV_REACTORS + struct rv_reactor_def *rdef; __printf(1, 2) void (*react)(const char *msg, ...); + bool reacting; #endif + struct list_head list; + struct rv_monitor *parent; + struct dentry *root_d; }; bool rv_monitoring_on(void); -- cgit v1.2.3 From 3d3c376118b5f7ed7723c2b4fd7a0a1c1893d63e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 24 Jul 2025 19:33:28 +0200 Subject: rv: Merge struct rv_reactor_def into struct rv_reactor Each struct rv_reactor has a unique struct rv_reactor_def associated with it. struct rv_reactor is statically allocated, while struct rv_reactor_def is dynamically allocated. This makes the code more complicated than it should be: - Lookup is required to get the associated rv_reactor_def from rv_reactor - Dynamic memory allocation is required for rv_reactor_def. This is harder to get right compared to static memory. For instance, there is an existing mistake: rv_unregister_reactor() does not free the memory allocated by rv_register_reactor(). This is fortunately not a real memory leak problem as rv_unregister_reactor() is never called. Simplify and merge rv_reactor_def into rv_reactor. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/71cb91c86cd40df5b8c492b788787f2a73c3eaa3.1753378331.git.namcao@linutronix.de Reviewed-by: Gabriele Monaco Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index dba53aecdfab..c22c9b8c1567 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -90,6 +90,9 @@ struct rv_reactor { const char *name; const char *description; __printf(1, 2) void (*react)(const char *msg, ...); + struct list_head list; + /* protected by the monitor interface lock */ + int counter; }; #endif @@ -101,7 +104,7 @@ struct rv_monitor { void (*disable)(void); void (*reset)(void); #ifdef CONFIG_RV_REACTORS - struct rv_reactor_def *rdef; + struct rv_reactor *reactor; __printf(1, 2) void (*react)(const char *msg, ...); bool reacting; #endif -- cgit v1.2.3 From 3d3800b4f7f4b1472a0ec2cffd535c05603f8f60 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 24 Jul 2025 19:33:29 +0200 Subject: rv: Remove rv_reactor's reference counter rv_reactor has a reference counter to ensure it is not removed while monitors are still using it. However, this is futile, as __exit functions are not expected to fail and will proceed normally despite rv_unregister_reactor() returning an error. At the moment, reactors do not support being built as modules, therefore they are never removed and the reference counters are not necessary. If we support building RV reactors as modules in the future, kernel module's centralized facilities such as try_module_get(), module_put() or MODULE_SOFTDEP should be used instead of this custom implementation. Remove this reference counter. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/bb946398436a5e17fb0f5b842ef3313c02291852.1753378331.git.namcao@linutronix.de Reviewed-by: Gabriele Monaco Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index c22c9b8c1567..2f867d6f72ba 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -91,8 +91,6 @@ struct rv_reactor { const char *description; __printf(1, 2) void (*react)(const char *msg, ...); struct list_head list; - /* protected by the monitor interface lock */ - int counter; }; #endif -- cgit v1.2.3 From b8a7fba39cd49eab343bfe561d85bb5dc57541af Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 24 Jul 2025 19:33:30 +0200 Subject: rv: Remove struct rv_monitor::reacting The field 'reacting' in struct rv_monitor is set but never used. Delete it. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/a6c16f845d2f1a09c4d0934ab83f3cb14478a71d.1753378331.git.namcao@linutronix.de Reviewed-by: Gabriele Monaco Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 2f867d6f72ba..80731242fe60 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -104,7 +104,6 @@ struct rv_monitor { #ifdef CONFIG_RV_REACTORS struct rv_reactor *reactor; __printf(1, 2) void (*react)(const char *msg, ...); - bool reacting; #endif struct list_head list; struct rv_monitor *parent; -- cgit v1.2.3 From 4d18a0b98259c2fa62f04ce5f94a7ec6e840f220 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 14 Jul 2025 21:03:16 +0800 Subject: ext4: get rid of some obsolete EXT4_MB_HINT flags Since nobody has used these EXT4_MB_HINT flags for ages, let's remove them. Signed-off-by: Baokun Li Reviewed-by: Ojaswin Mujoo Reviewed-by: Jan Kara Reviewed-by: Zhang Yi Link: https://patch.msgid.link/20250714130327.1830534-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 845451077c41..53dd2cc28fc5 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -23,10 +23,7 @@ struct partial_cluster; #define show_mballoc_flags(flags) __print_flags(flags, "|", \ { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ - { EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \ - { EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \ { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ - { EXT4_MB_HINT_BEST, "HINT_BEST" }, \ { EXT4_MB_HINT_DATA, "HINT_DATA" }, \ { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ -- cgit v1.2.3 From e89a68046687fe9913ce3bfad82f7ccbb65687e0 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Mon, 26 May 2025 16:59:02 +0800 Subject: netfilter: load nf_log_syslog on enabling nf_conntrack_log_invalid When no logger is registered, nf_conntrack_log_invalid fails to log invalid packets, leaving users unaware of actual invalid traffic. Improve this by loading nf_log_syslog, similar to how 'iptables -I FORWARD 1 -m conntrack --ctstate INVALID -j LOG' triggers it. Suggested-by: Florian Westphal Signed-off-by: Zi Li Signed-off-by: Lance Yang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index e55eedc84ed7..00506792a06d 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -59,6 +59,9 @@ extern int sysctl_nf_log_all_netns; int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); +/* Check if any logger is registered for a given protocol family. */ +bool nf_log_is_registered(u_int8_t pf); + int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger); void nf_log_unset(struct net *net, const struct nf_logger *logger); -- cgit v1.2.3 From 031a712471943ce780a7fc56e35b68cf77243e1e Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 24 Jun 2025 09:44:32 +0800 Subject: netfilter: x_tables: Remove unused functions xt_{in|out}name() Since commit 2173c519d5e9 ("audit: normalize NETFILTER_PKT") these are unused, so can be removed. Signed-off-by: Yue Haibing Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index f39f688d7285..77c778d84d4c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -51,21 +51,11 @@ static inline struct net_device *xt_in(const struct xt_action_param *par) return par->state->in; } -static inline const char *xt_inname(const struct xt_action_param *par) -{ - return par->state->in->name; -} - static inline struct net_device *xt_out(const struct xt_action_param *par) { return par->state->out; } -static inline const char *xt_outname(const struct xt_action_param *par) -{ - return par->state->out->name; -} - static inline unsigned int xt_hooknum(const struct xt_action_param *par) { return par->state->hook; -- cgit v1.2.3 From bf6788742b8d6c73de441e088a71de7154f0d4aa Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 24 Jun 2025 09:48:18 +0800 Subject: netfilter: nf_tables: Remove unused nft_reduce_is_readonly() Since commit 9e539c5b6d9c ("netfilter: nf_tables: disable expression reduction infra") this is unused. Signed-off-by: Yue Haibing Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5e49619ae49c..b092e57d3c75 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1939,11 +1939,6 @@ static inline u64 nft_net_tstamp(const struct net *net) #define __NFT_REDUCE_READONLY 1UL #define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY -static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) -{ - return expr->ops->reduce == NFT_REDUCE_READONLY; -} - void nft_reg_track_update(struct nft_regs_track *track, const struct nft_expr *expr, u8 dreg, u8 len); void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); -- cgit v1.2.3 From bc8c43adfdc57c8253884fc1853cb6679cd5953d Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 8 Jul 2025 15:04:02 +0200 Subject: netfilter: nfnetlink_hook: Dump flowtable info Introduce NFNL_HOOK_TYPE_NFT_FLOWTABLE to distinguish flowtable hooks from base chain ones. Nested attributes are shared with the old NFTABLES hook info type since they fit apart from their misleading name. Old nftables in user space will ignore this new hook type and thus continue to print flowtable hooks just like before, e.g.: | family netdev { | hook ingress device test0 { | 0000000000 nf_flow_offload_ip_hook [nf_flow_table] | } | } With this patch in place and support for the new hook info type, output becomes more useful: | family netdev { | hook ingress device test0 { | 0000000000 flowtable ip mytable myft [nf_flow_table] | } | } Suggested-by: Florian Westphal Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 1 + include/uapi/linux/netfilter/nfnetlink_hook.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 5f896fcc074d..efbbfa770d66 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -92,6 +92,7 @@ enum nf_hook_ops_type { NF_HOOK_OP_UNDEFINED, NF_HOOK_OP_NF_TABLES, NF_HOOK_OP_BPF, + NF_HOOK_OP_NFT_FT, }; struct nf_hook_ops { diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h index 84a561a74b98..1a2c4d6424b5 100644 --- a/include/uapi/linux/netfilter/nfnetlink_hook.h +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -61,10 +61,12 @@ enum nfnl_hook_chain_desc_attributes { * * @NFNL_HOOK_TYPE_NFTABLES: nf_tables base chain * @NFNL_HOOK_TYPE_BPF: bpf program + * @NFNL_HOOK_TYPE_NFT_FLOWTABLE: nf_tables flowtable */ enum nfnl_hook_chaintype { NFNL_HOOK_TYPE_NFTABLES = 0x1, NFNL_HOOK_TYPE_BPF, + NFNL_HOOK_TYPE_NFT_FLOWTABLE, }; /** -- cgit v1.2.3 From 17a20e09f086f2c574ac87f3cf6e14c4377f65f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Jul 2025 19:05:13 +0200 Subject: netfilter: nft_set: remove one argument from lookup and update functions Return the extension pointer instead of passing it as a function argument to be filled in by the callee. As-is, whenever false is returned, the extension pointer is not used. For all set types, when true is returned, the extension pointer was set to the matching element. Only exception: nft_set_bitmap doesn't support extensions. Return a pointer to a static const empty element extension container. return false -> return NULL return true -> return the elements' extension pointer. This saves one function argument. Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 +++----- include/net/netfilter/nf_tables_core.h | 47 +++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b092e57d3c75..5b6725475906 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -459,19 +459,17 @@ struct nft_set_ext; * control plane functions. */ struct nft_set_ops { - bool (*lookup)(const struct net *net, + const struct nft_set_ext * (*lookup)(const struct net *net, const struct nft_set *set, - const u32 *key, - const struct nft_set_ext **ext); - bool (*update)(struct nft_set *set, + const u32 *key); + const struct nft_set_ext * (*update)(struct nft_set *set, const u32 *key, struct nft_elem_priv * (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *), const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_set_ext **ext); + struct nft_regs *regs); bool (*delete)(const struct nft_set *set, const u32 *key); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 03b6165756fc..6a52fb97b844 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -94,34 +94,41 @@ extern const struct nft_set_type nft_set_pipapo_type; extern const struct nft_set_type nft_set_pipapo_avx2_type; #ifdef CONFIG_MITIGATION_RETPOLINE -bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); -bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); -bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); -bool nft_hash_lookup_fast(const struct net *net, - const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); -bool nft_hash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); -bool nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); +const struct nft_set_ext * +nft_rhash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); +const struct nft_set_ext * +nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); +const struct nft_set_ext * +nft_bitmap_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); +const struct nft_set_ext * +nft_hash_lookup_fast(const struct net *net, const struct nft_set *set, + const u32 *key); +const struct nft_set_ext * +nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); +const struct nft_set_ext * +nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); #else -static inline bool +static inline const struct nft_set_ext * nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) + const u32 *key) { - return set->ops->lookup(net, set, key, ext); + return set->ops->lookup(net, set, key); } #endif /* called from nft_pipapo_avx2.c */ -bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); +const struct nft_set_ext * +nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); /* called from nft_set_pipapo.c */ -bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext); +const struct nft_set_ext * +nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key); void nft_counter_init_seqcount(void); -- cgit v1.2.3 From 531e61312104d991459af73c838396db26aa3550 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Jul 2025 19:05:14 +0200 Subject: netfilter: nft_set: remove indirection from update API call This stems from a time when sets and nft_dynset resided in different kernel modules. We can replace this with a direct call. We could even remove both ->update and ->delete, given its only supported by rhashtable, but on the off-chance we'll see runtime add/delete for other types or a new set type keep that as-is for now. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ---- include/net/netfilter/nf_tables_core.h | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5b6725475906..891e43a01bdc 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -464,10 +464,6 @@ struct nft_set_ops { const u32 *key); const struct nft_set_ext * (*update)(struct nft_set *set, const u32 *key, - struct nft_elem_priv * - (*new)(struct nft_set *, - const struct nft_expr *, - struct nft_regs *), const struct nft_expr *expr, struct nft_regs *regs); bool (*delete)(const struct nft_set *set, diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 6a52fb97b844..6c2f483d9828 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -188,4 +188,7 @@ void nft_objref_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_objref_map_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +struct nft_elem_priv *nft_dynset_new(struct nft_set *set, + const struct nft_expr *expr, + struct nft_regs *regs); #endif /* _NET_NF_TABLES_CORE_H */ -- cgit v1.2.3 From 0d9cfc9b8cb17dbc29a98792d36ec39a1cf1395f Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Wed, 23 Jul 2025 10:25:35 +0000 Subject: net: usbnet: Avoid potential RCU stall on LINK_CHANGE event The Gemalto Cinterion PLS83-W modem (cdc_ether) is emitting confusing link up and down events when the WWAN interface is activated on the modem-side. Interrupt URBs will in consecutive polls grab: * Link Connected * Link Disconnected * Link Connected Where the last Connected is then a stable link state. When the system is under load this may cause the unlink_urbs() work in __handle_link_change() to not complete before the next usbnet_link_change() call turns the carrier on again, allowing rx_submit() to queue new SKBs. In that event the URB queue is filled faster than it can drain, ending up in a RCU stall: rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 0-.... } 33108 jiffies s: 201 root: 0x1/. rcu: blocking rcu_node structures (internal RCU debug): Sending NMI from CPU 1 to CPUs 0: NMI backtrace for cpu 0 Call trace: arch_local_irq_enable+0x4/0x8 local_bh_enable+0x18/0x20 __netdev_alloc_skb+0x18c/0x1cc rx_submit+0x68/0x1f8 [usbnet] rx_alloc_submit+0x4c/0x74 [usbnet] usbnet_bh+0x1d8/0x218 [usbnet] usbnet_bh_tasklet+0x10/0x18 [usbnet] tasklet_action_common+0xa8/0x110 tasklet_action+0x2c/0x34 handle_softirqs+0x2cc/0x3a0 __do_softirq+0x10/0x18 ____do_softirq+0xc/0x14 call_on_irq_stack+0x24/0x34 do_softirq_own_stack+0x18/0x20 __irq_exit_rcu+0xa8/0xb8 irq_exit_rcu+0xc/0x30 el1_interrupt+0x34/0x48 el1h_64_irq_handler+0x14/0x1c el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x38/0x48 xhci_urb_dequeue+0x1ac/0x45c [xhci_hcd] unlink1+0xd4/0xdc [usbcore] usb_hcd_unlink_urb+0x70/0xb0 [usbcore] usb_unlink_urb+0x24/0x44 [usbcore] unlink_urbs.constprop.0.isra.0+0x64/0xa8 [usbnet] __handle_link_change+0x34/0x70 [usbnet] usbnet_deferred_kevent+0x1c0/0x320 [usbnet] process_scheduled_works+0x2d0/0x48c worker_thread+0x150/0x1dc kthread+0xd8/0xe8 ret_from_fork+0x10/0x20 Get around the problem by delaying the carrier on to the scheduled work. This needs a new flag to keep track of the necessary action. The carrier ok check cannot be removed as it remains required for the LINK_RESET event flow. Fixes: 4b49f58fff00 ("usbnet: handle link change") Cc: stable@vger.kernel.org Signed-off-by: John Ernberg Link: https://patch.msgid.link/20250723102526.1305339-1-john.ernberg@actia.se Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0b9f1e598e3a..4bc6bb01a0eb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -76,6 +76,7 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 +# define EVENT_LINK_CARRIER_ON 14 /* This one is special, as it indicates that the device is going away * there are cyclic dependencies between tasklet, timer and bh * that must be broken -- cgit v1.2.3 From f24987ef6959a7efaf79bffd265522c3df18d431 Mon Sep 17 00:00:00 2001 From: Gabriel Goller Date: Tue, 22 Jul 2025 10:18:45 +0200 Subject: ipv6: add `force_forwarding` sysctl to enable per-interface forwarding It is currently impossible to enable ipv6 forwarding on a per-interface basis like in ipv4. To enable forwarding on an ipv6 interface we need to enable it on all interfaces and disable it on the other interfaces using a netfilter rule. This is especially cumbersome if you have lots of interfaces and only want to enable forwarding on a few. According to the sysctl docs [0] the `net.ipv6.conf.all.forwarding` enables forwarding for all interfaces, while the interface-specific `net.ipv6.conf..forwarding` configures the interface Host/Router configuration. Introduce a new sysctl flag `force_forwarding`, which can be set on every interface. The ip6_forwarding function will then check if the global forwarding flag OR the force_forwarding flag is active and forward the packet. To preserve backwards-compatibility reset the flag (on all interfaces) to 0 if the net.ipv6.conf.all.forwarding flag is set to 0. Add a short selftest that checks if a packet gets forwarded with and without `force_forwarding`. [0]: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt Acked-by: Nicolas Dichtel Signed-off-by: Gabriel Goller Link: https://patch.msgid.link/20250722081847.132632-1-g.goller@proxmox.com Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + include/uapi/linux/netconf.h | 1 + include/uapi/linux/sysctl.h | 1 + 4 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index db0eb0d86b64..bc6ec2959173 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -17,6 +17,7 @@ struct ipv6_devconf { __s32 hop_limit; __s32 mtu6; __s32 forwarding; + __s32 force_forwarding; __s32 disable_policy; __s32 proxy_ndp; __cacheline_group_end(ipv6_devconf_read_txrx); diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index cf592d7b630f..d4d3ae774b26 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -199,6 +199,7 @@ enum { DEVCONF_NDISC_EVICT_NOCARRIER, DEVCONF_ACCEPT_UNTRACKED_NA, DEVCONF_ACCEPT_RA_MIN_LFT, + DEVCONF_FORCE_FORWARDING, DEVCONF_MAX }; diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index fac4edd55379..1c8c84d65ae3 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -19,6 +19,7 @@ enum { NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_INPUT, NETCONFA_BC_FORWARDING, + NETCONFA_FORCE_FORWARDING, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 8981f00204db..63d1464cb71c 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -573,6 +573,7 @@ enum { NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, NET_IPV6_RA_DEFRTR_METRIC=28, + NET_IPV6_FORCE_FORWARDING=29, __NET_IPV6_MAX }; -- cgit v1.2.3 From 33360f2508e07b07bb926ea75f11744dcc1cde07 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 23 Jul 2025 10:20:29 -0700 Subject: netpoll: Remove unused fields from inet_addr union Clean up the inet_addr union by removing unused fields that are redundant with existing members: This simplifies the union structure while maintaining all necessary functionality for both IPv4 and IPv6 address handling. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250723-netconsole_ref-v3-1-8be9b24e4a99@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 735e65c3cc11..b5ea9882eda8 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -15,10 +15,7 @@ #include union inet_addr { - __u32 all[4]; __be32 ip; - __be32 ip6[4]; - struct in_addr in; struct in6_addr in6; }; -- cgit v1.2.3 From c2dbaf0af05193fa69f267c37d1f825c1c3a59ab Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Jun 2025 10:05:52 -0400 Subject: tracing: sched: Hide numa events under CONFIG_NUMA_BALANCING The events sched_move_numa, sched_stick_numa and sched_swap_numa are only called when CONFIG_NUMA_BALANCING is configured. As each event can take up to 5K of memory in text and meta data regardless if they are used or not, they should not be defined when unused. Move the #ifdef CONFIG_NUMA_BALANCING to hide these events as well. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Link: https://lore.kernel.org/20250612100552.39672cf9@batman.local.home Reviewed-by: Shrikanth Hegde Signed-off-by: Steven Rostedt (Google) --- include/trace/events/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4e6b2910cec3..0243f32e068a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -628,6 +628,7 @@ TRACE_EVENT(sched_process_hang, ); #endif /* CONFIG_DETECT_HUNG_TASK */ +#ifdef CONFIG_NUMA_BALANCING /* * Tracks migration of tasks from one runqueue to another. Can be used to * detect if automatic NUMA balancing is bouncing between nodes. @@ -720,7 +721,6 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) ); -#ifdef CONFIG_NUMA_BALANCING #define NUMAB_SKIP_REASON \ EM( NUMAB_SKIP_UNSUITABLE, "unsuitable" ) \ EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ -- cgit v1.2.3 From 463deed51796fd0995d08d8b6aa793d7ab5a2059 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 22 Jul 2025 10:18:31 -0700 Subject: ipv6: Add sockaddr_inet unified address structure There are cases in networking (e.g. wireguard, sctp) where a union is used to provide coverage for either IPv4 or IPv6 network addresses, and they include an embedded "struct sockaddr" as well (for "sa_family" and raw "sa_data" access). The current struct sockaddr contains a flexible array, which means these unions should not be further embedded in other structs because they do not technically have a fixed size (and are generating warnings for the coming -Wflexible-array-not-at-end flag addition). But the future changes to make struct sockaddr a fixed size (i.e. with a 14 byte sa_data member) make the "sa_data" uses with an IPv6 address a potential place for the compiler to get upset about object size mismatches. Therefore, we need a sockaddr that cleanly provides both an sa_family member and an appropriately fixed-sized sa_data member that does not bloat member usage via the potential alternative of sockaddr_storage to cover both IPv4 and IPv6, to avoid unseemly churn in the affected code bases. Introduce sockaddr_inet as a unified structure for holding both IPv4 and IPv6 addresses (i.e. large enough to accommodate sockaddr_in6). The structure is defined in linux/in6.h since its max size is sized based on sockaddr_in6 and provides a more specific alternative to the generic sockaddr_storage for IPv4 with IPv6 address family handling. The "sa_family" member doesn't use the sa_family_t type to avoid needing layer violating header inclusions. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20250722171836.1078436-1-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/in6.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/in6.h b/include/linux/in6.h index 0777a21cbf86..403f926d33d8 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -18,6 +18,13 @@ #include +/* Large enough to hold both sockaddr_in and sockaddr_in6. */ +struct sockaddr_inet { + unsigned short sa_family; + char sa_data[sizeof(struct sockaddr_in6) - + sizeof(unsigned short)]; +}; + /* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined * in network byte order, not in host byte order as are the IPv4 equivalents -- cgit v1.2.3 From 511d10b4c2f91fb6aa676006b2bdff4df5d6e270 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 22 Jul 2025 10:18:33 -0700 Subject: sctp: Replace sockaddr with sockaddr_inet in sctp_addr union As part of the removal of the variably-sized sockaddr for kernel internals, replace struct sockaddr with sockaddr_inet in the sctp_addr union. No binary changes; the union size remains unchanged due to sockaddr_inet matching the size of sockaddr_in6. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20250722171836.1078436-3-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/net/sctp/structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 1ad7ce71d0a7..8a540ad9b509 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -51,9 +51,9 @@ * We should wean ourselves off this. */ union sctp_addr { + struct sockaddr_inet sa; /* Large enough for both address families */ struct sockaddr_in v4; struct sockaddr_in6 v6; - struct sockaddr sa; }; /* Forward declarations for data structures. */ -- cgit v1.2.3 From 84c47bfc5b3b40b50b798b6b6c15e8a1442d936d Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Thu, 24 Jul 2025 17:17:49 -0700 Subject: net: dsa: microchip: Add KSZ8463 switch support to KSZ DSA driver KSZ8463 switch is a 3-port switch based from KSZ8863. Its major difference from other KSZ SPI switches is its register access is not a simple continual 8-bit transfer with automatic address increase but uses a byte-enable mechanism specifying 8-bit, 16-bit, or 32-bit access. Its registers are also defined in 16-bit format because it shares a design with a MAC controller using 16-bit access. As a result some common register accesses need to be re-arranged. This patch adds the basic structure for using KSZ8463. It cannot use the same regmap table for other KSZ switches as it interprets the 16-bit value as little-endian and its SPI commands are different. KSZ8463 uses a byte-enable mechanism to specify 8-bit, 16-bit, and 32-bit access. The register is first shifted right by 2 then left by 4. Extra 4 bits are added. If the access is 8-bit one of the 4 bits is set. If the access is 16-bit two of the 4 bits are set. If the access is 32-bit all 4 bits are set. The SPI command for read or write is then added. Because of this register transformation separate SPI read and write functions are provided for KSZ8463. KSZ8463's internal PHYs use standard PHY register definitions so there is no need to remap things. However, the hardware has a bug that the high word and low word of the PHY id are swapped. In addition the port registers are arranged differently so KSZ8463 has its own mapping for port registers and PHY registers. Therefore the PORT_CTRL_ADDR macro is replaced with the get_port_addr helper function. Signed-off-by: Tristram Ha Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250725001753.6330-3-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/platform_data/microchip-ksz.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index 0e0e8fe6975f..028781ad4059 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -23,6 +23,7 @@ #include enum ksz_chip_id { + KSZ8463_CHIP_ID = 0x8463, KSZ8563_CHIP_ID = 0x8563, KSZ8795_CHIP_ID = 0x8795, KSZ8794_CHIP_ID = 0x8794, -- cgit v1.2.3 From c652887a92887b9a2d48ee40f49f8013449b9a50 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Wed, 23 Jul 2025 23:28:03 -0700 Subject: KVM: arm64: vgic-v3: Allow userspace to write GICD_TYPER2.nASSGIcap KVM unconditionally advertises GICD_TYPER2.nASSGIcap (which internally implies vSGIs) on GICv4.1 systems. Allow userspace to change whether a VM supports the feature. Only allow changes prior to VGIC initialization as at that point vPEs need to be allocated for the VM. For convenience, bundle support for vLPIs and vSGIs behind this feature, allowing userspace to control vPE allocation for VMs in environments that may be constrained on vPE IDs. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250724062805.2658919-5-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_vgic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4a34f7f0a864..1b4886f3fb20 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -264,6 +264,9 @@ struct vgic_dist { /* distributor enabled */ bool enabled; + /* Supports SGIs without active state */ + bool nassgicap; + /* Wants SGIs without active state */ bool nassgireq; -- cgit v1.2.3 From b7b3500bd4eef2c3b5124ed195f26eb048407d9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 21 Jul 2025 11:04:42 +0200 Subject: umd: Remove usermode driver framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code is unused since 98e20e5e13d2 ("bpfilter: remove bpfilter"), therefore remove it. Signed-off-by: Thomas Weißschuh Signed-off-by: Daniel Borkmann Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner Acked-by: "Eric W. Biederman" Link: https://lore.kernel.org/bpf/20250721-remove-usermode-driver-v1-2-0d0083334382@linutronix.de --- include/linux/usermode_driver.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/usermode_driver.h (limited to 'include') diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h deleted file mode 100644 index ad970416260d..000000000000 --- a/include/linux/usermode_driver.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __LINUX_USERMODE_DRIVER_H__ -#define __LINUX_USERMODE_DRIVER_H__ - -#include -#include - -struct umd_info { - const char *driver_name; - struct file *pipe_to_umh; - struct file *pipe_from_umh; - struct path wd; - struct pid *tgid; -}; -int umd_load_blob(struct umd_info *info, const void *data, size_t len); -int umd_unload_blob(struct umd_info *info); -int fork_usermode_driver(struct umd_info *info); -void umd_cleanup_helper(struct umd_info *info); - -#endif /* __LINUX_USERMODE_DRIVER_H__ */ -- cgit v1.2.3 From 3ba58312e65665e5b9097c7969a51fa49914d85d Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 24 Jul 2025 12:02:53 +0000 Subject: bpf: Move bpf_jit_get_prog_name() to core.c bpf_jit_get_prog_name() will be used by all JITs when enabling support for private stack. This function is currently implemented in the x86 JIT. Move the function to core.c so that other JITs can easily use it in their implementation of private stack. Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20250724120257.7299-2-puranjay@kernel.org --- include/linux/filter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index eca229752cbe..5cc7a82ec832 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1278,6 +1278,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, const struct bpf_insn *insn, bool extra_pass, u64 *func_addr, bool *func_addr_fixed); +const char *bpf_jit_get_prog_name(struct bpf_prog *prog); + struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); -- cgit v1.2.3 From 381a38ea53d25ed6f93ba007b021db86c2a36bc6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Jul 2025 22:50:27 -0700 Subject: init.h: Disable sanitizer coverage for __init and __head While __noinstr already contained __no_sanitize_coverage, it needs to be added to __init and __head section markings to support the Clang implementation of CONFIG_KSTACK_ERASE. This is to make sure the stack depth tracking callback is not executed in unsupported contexts. The other sanitizer coverage options (trace-pc and trace-cmp) aren't needed in __head nor __init either ("We are interested in code coverage as a function of a syscall inputs"[1]), so this is fine to disable for them as well. Link: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/kcov.c?h=v6.14#n179 [1] Acked-by: Marco Elver Link: https://lore.kernel.org/r/20250724055029.3623499-3-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/init.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index ee1309473bc6..c65a050d52a7 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -49,7 +49,9 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline +#define __init __section(".init.text") __cold __latent_entropy \ + __noinitretpoline \ + __no_sanitize_coverage #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") #define __exitdata __section(".exit.data") -- cgit v1.2.3 From 32e42ab9fc88a884435c27527a433f61c4d2b61b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 26 Jul 2025 00:29:54 -0700 Subject: sched/task_stack: Add missing const qualifier to end_of_stack() Add missing const qualifier to the non-CONFIG_THREAD_INFO_IN_TASK version of end_of_stack() to match the CONFIG_THREAD_INFO_IN_TASK version. Fixes a warning with CONFIG_KSTACK_ERASE=y on archs that don't select THREAD_INFO_IN_TASK (such as LoongArch): error: passing 'const struct task_struct *' to parameter of type 'struct task_struct *' discards qualifiers The stackleak_task_low_bound() function correctly uses a const task parameter, but the legacy end_of_stack() prototype didn't like that. Build tested on loongarch (with CONFIG_KSTACK_ERASE=y) and m68k (with CONFIG_DEBUG_STACK_USAGE=y). Fixes: a45728fd4120 ("LoongArch: Enable HAVE_ARCH_STACKLEAK") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/all/20250726004313.GA3650901@ax162 Cc: Youling Tang Cc: Huacai Chen Tested-by: Nathan Chancellor Signed-off-by: Kees Cook --- include/linux/sched/task_stack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 85c5a6392e02..1fab7e9043a3 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -53,7 +53,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct * When the stack grows up, this is the highest address. * Beyond that position, we corrupt data on the next page. */ -static inline unsigned long *end_of_stack(struct task_struct *p) +static inline unsigned long *end_of_stack(const struct task_struct *p) { #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; -- cgit v1.2.3 From 48e6561b667e7f0623da3ca34e2b93b7ae2a5d8d Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 22 Jul 2025 15:46:49 -0400 Subject: mm/page_alloc: remove trace_mm_alloc_contig_migrate_range_info() The trace event has not recorded the right data since it was introduced at commit c8b360031218 ("mm: add alloc_contig_migrate_range allocation statistics"). Remove it. Link: https://lkml.kernel.org/r/20250722194649.4135191-1-ziy@nvidia.com Signed-off-by: Zi Yan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507220742.P3SaKlI6-lkp@intel.com/ Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Brendan Jackman Cc: David Rientjes Cc: Johannes Weiner Cc: Martin Liu Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Michal Hocko Cc: Richard Chang Cc: Steven Rostedt Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 40 ---------------------------------------- 1 file changed, 40 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index efffcf578217..474358773abe 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -304,46 +304,6 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->change_ownership) ); -#ifdef CONFIG_CONTIG_ALLOC -TRACE_EVENT(mm_alloc_contig_migrate_range_info, - - TP_PROTO(unsigned long start, - unsigned long end, - unsigned long nr_migrated, - unsigned long nr_reclaimed, - unsigned long nr_mapped, - acr_flags_t alloc_flags), - - TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, alloc_flags), - - TP_STRUCT__entry( - __field(unsigned long, start) - __field(unsigned long, end) - __field(unsigned long, nr_migrated) - __field(unsigned long, nr_reclaimed) - __field(unsigned long, nr_mapped) - __field(acr_flags_t, alloc_flags) - ), - - TP_fast_assign( - __entry->start = start; - __entry->end = end; - __entry->nr_migrated = nr_migrated; - __entry->nr_reclaimed = nr_reclaimed; - __entry->nr_mapped = nr_mapped; - __entry->alloc_flags = alloc_flags; - ), - - TP_printk("start=0x%lx end=0x%lx alloc_flags=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu", - __entry->start, - __entry->end, - __entry->alloc_flags, - __entry->nr_migrated, - __entry->nr_reclaimed, - __entry->nr_mapped) -); -#endif - TRACE_EVENT(mm_setup_per_zone_wmarks, TP_PROTO(struct zone *zone), -- cgit v1.2.3 From 97c01e65ef4c1878532be245b2899fc4363cc453 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sun, 27 Jul 2025 01:15:17 -0700 Subject: Input: Add and document BTN_GRIP* Many controllers these days have started including grip buttons. As there has been no particular assigned BTN_* constants for these, they've been haphazardly assigned to BTN_TRIGGER_HAPPY*. Unfortunately, the assignment of these has varied significantly between drivers. Add and document new constants for these grip buttons. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250702040102.125432-2-vi@endrift.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 5a199f3d4a26..5426297d93fd 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -601,6 +601,11 @@ #define BTN_DPAD_LEFT 0x222 #define BTN_DPAD_RIGHT 0x223 +#define BTN_GRIPL 0x224 +#define BTN_GRIPR 0x225 +#define BTN_GRIPL2 0x226 +#define BTN_GRIPR2 0x227 + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ #define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ -- cgit v1.2.3 From ea4d331050b4cd43e6a900937db88b01ef75e1f2 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 16 Oct 2024 06:02:41 +0200 Subject: Input: touch-overlay - add touchscreen overlay handling Some touch devices provide mechanical overlays with different objects like buttons or clipped touchscreen surfaces. In order to support these objects, add a series of helper functions to the input subsystem to transform them into overlay objects via device tree nodes. These overlay objects consume the raw touch events and report the expected input events depending on the object properties. Note that the current implementation allows for multiple definitions of touchscreen areas (regions that report touch events), but only the first one will be used for the touchscreen device that the consumers typically provide. Should the need for multiple touchscreen areas arise, additional touchscreen devices would be required at the consumer side. There is no limitation in the number of touch areas defined as buttons. Reviewed-by: Jeff LaBundy Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-2-b292a1bbb0a1@wolfvision.net Signed-off-by: Dmitry Torokhov --- include/linux/input/touch-overlay.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/input/touch-overlay.h (limited to 'include') diff --git a/include/linux/input/touch-overlay.h b/include/linux/input/touch-overlay.h new file mode 100644 index 000000000000..0253e554d3cd --- /dev/null +++ b/include/linux/input/touch-overlay.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Javier Carrasco + */ + +#ifndef _TOUCH_OVERLAY +#define _TOUCH_OVERLAY + +#include + +struct input_dev; + +int touch_overlay_map(struct list_head *list, struct input_dev *input); + +void touch_overlay_get_touchscreen_abs(struct list_head *list, u16 *x, u16 *y); + +bool touch_overlay_mapped_touchscreen(struct list_head *list); + +bool touch_overlay_process_contact(struct list_head *list, + struct input_dev *input, + struct input_mt_pos *pos, int slot); + +void touch_overlay_sync_frame(struct list_head *list, struct input_dev *input); + +#endif -- cgit v1.2.3 From 56eb7c13b97c6f9e2fed9e9899b01d1a6a595f28 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jun 2025 19:08:12 +0300 Subject: mtd: map: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Note that kernel.h is discouraged to be included as it's written at the top of that file. Signed-off-by: Andy Shevchenko Signed-off-by: Miquel Raynal --- include/linux/mtd/map.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 1b56796f6cb3..288ef765a44e 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -8,15 +8,15 @@ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ -#include -#include -#include #include -#include #include - +#include +#include +#include #include -#include + +struct device_node; +struct module; #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 #define map_bankwidth(map) 1 @@ -188,6 +188,7 @@ typedef union { of living. */ +struct mtd_chip_driver; struct map_info { const char *name; unsigned long size; -- cgit v1.2.3 From 08da98e1b2f76cdbacf84b9affaa75960dbce515 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 8 Jul 2025 16:36:40 +0200 Subject: fsnotify: merge file_set_fsnotify_mode_from_watchers() with open perm hook Create helper fsnotify_open_perm_and_set_mode() that moves the fsnotify_open_perm() hook into file_set_fsnotify_mode_from_watchers(). This will allow some more optimizations. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250708143641.418603-2-amir73il@gmail.com --- include/linux/fsnotify.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 454d8e466958..8c1fa617d375 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -129,7 +129,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS -void file_set_fsnotify_mode_from_watchers(struct file *file); +int fsnotify_open_perm_and_set_mode(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range @@ -215,9 +215,6 @@ static inline int fsnotify_open_perm(struct file *file) { int ret; - if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) - return 0; - if (file->f_flags & __FMODE_EXEC) { ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) @@ -228,8 +225,9 @@ static inline int fsnotify_open_perm(struct file *file) } #else -static inline void file_set_fsnotify_mode_from_watchers(struct file *file) +static inline int fsnotify_open_perm_and_set_mode(struct file *file) { + return 0; } static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, -- cgit v1.2.3 From 0d4c4d4ea443babab6ec1a79f481260963fc969a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 8 Jul 2025 16:36:41 +0200 Subject: fsnotify: optimize FMODE_NONOTIFY_PERM for the common cases The most unlikely watched permission event is FAN_ACCESS_PERM, because at the time that it was introduced there were no evictable ignore mark, so subscribing to FAN_ACCESS_PERM would have incured a very high overhead. Yet, when we set the fmode to FMODE_NOTIFY_HSM(), we never skip trying to send FAN_ACCESS_PERM, which is almost always a waste of cycles. We got to this logic because of bundling FAN_OPEN*_PERM and FAN_ACCESS_PERM in the same category and because FAN_OPEN_PERM is a commonly used event. By open coding fsnotify_open_perm() in fsnotify_open_perm_and_set_mode(), we no longer need to regard FAN_OPEN*_PERM when calculating fmode. This leaves the case of having pre-content events and not having any other permission event in the object masks a more likely case than the other way around. Rework the fmode macros and code so that their meaning now refers only to hooks on an already open file: - FMODE_NOTIFY_NONE() skip all events - FMODE_NOTIFY_ACCESS_PERM() send all permission events including FAN_ACCESS_PERM - FMODE_NOTIFY_HSM() send pre-content permission events Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250708143641.418603-3-amir73il@gmail.com --- include/linux/fs.h | 12 ++++++------ include/linux/fsnotify.h | 27 ++------------------------- 2 files changed, 8 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b085f161ed22..bc92fdb8bfcc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -200,12 +200,12 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* * The two FMODE_NONOTIFY* define which fsnotify events should not be generated - * for a file. These are the possible values of (f->f_mode & - * FMODE_FSNOTIFY_MASK) and their meaning: + * for an open file. These are the possible values of + * (f->f_mode & FMODE_FSNOTIFY_MASK) and their meaning: * * FMODE_NONOTIFY - suppress all (incl. non-permission) events. * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events. - * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events. + * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only FAN_ACCESS_PERM. */ #define FMODE_FSNOTIFY_MASK \ (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM) @@ -213,13 +213,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_FSNOTIFY_NONE(mode) \ ((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS -#define FMODE_FSNOTIFY_PERM(mode) \ +#define FMODE_FSNOTIFY_HSM(mode) \ ((mode & FMODE_FSNOTIFY_MASK) == 0 || \ (mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)) -#define FMODE_FSNOTIFY_HSM(mode) \ +#define FMODE_FSNOTIFY_ACCESS_PERM(mode) \ ((mode & FMODE_FSNOTIFY_MASK) == 0) #else -#define FMODE_FSNOTIFY_PERM(mode) 0 +#define FMODE_FSNOTIFY_ACCESS_PERM(mode) 0 #define FMODE_FSNOTIFY_HSM(mode) 0 #endif diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 8c1fa617d375..28a9cb13fbfa 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -147,9 +147,6 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) return 0; - if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) - return 0; - /* * read()/write() and other types of access generate pre-content events. */ @@ -160,7 +157,8 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return ret; } - if (!(perm_mask & MAY_READ)) + if (!(perm_mask & MAY_READ) || + likely(!FMODE_FSNOTIFY_ACCESS_PERM(file->f_mode))) return 0; /* @@ -208,22 +206,6 @@ static inline int fsnotify_file_perm(struct file *file, int perm_mask) return fsnotify_file_area_perm(file, perm_mask, NULL, 0); } -/* - * fsnotify_open_perm - permission hook before file open - */ -static inline int fsnotify_open_perm(struct file *file) -{ - int ret; - - if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); - if (ret) - return ret; - } - - return fsnotify_path(&file->f_path, FS_OPEN_PERM); -} - #else static inline int fsnotify_open_perm_and_set_mode(struct file *file) { @@ -251,11 +233,6 @@ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; } - -static inline int fsnotify_open_perm(struct file *file) -{ - return 0; -} #endif /* -- cgit v1.2.3 From 28a78afda6c80dfdcbec51813cb8d2813523ea0c Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 28 Jul 2025 15:50:13 +0200 Subject: rv: Add da_handle_start_run_event_ to per-task monitors The RV da_monitor API allows to start monitors in two ways: da_handle_start_event_NAME and da_handle_start_run_event_NAME. The former is used when the event is followed by the initial state of the module, so we ignore the event but we know the monitor is in the initial state and can start monitoring, the latter can be used if the event can only occur in the initial state, so we do handle the event as if the monitor was in the initial state. This latter API is defined for implicit monitors but not per-task ones. Define da_handle_start_run_event_NAME macro also for per-task monitors. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Tomas Glozar Cc: Juri Lelli Cc: Clark Williams Cc: John Kacur Link: https://lore.kernel.org/20250728135022.255578-2-gmonaco@redhat.com Reviewed-by: Nam Cao Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- include/rv/da_monitor.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 15f9ed4e4bb6..ed3c34fe18d6 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -487,6 +487,30 @@ da_handle_start_event_##name(struct task_struct *tsk, enum events_##name event) __da_handle_event_##name(da_mon, tsk, event); \ \ return 1; \ +} \ + \ +/* \ + * da_handle_start_run_event_##name - start monitoring and handle event \ + * \ + * This function is used to notify the monitor that the system is in the \ + * initial state, so the monitor can start monitoring and handling event. \ + */ \ +static inline bool \ +da_handle_start_run_event_##name(struct task_struct *tsk, enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(tsk); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) \ + da_monitor_start_##name(da_mon); \ + \ + __da_handle_event_##name(da_mon, tsk, event); \ + \ + return 1; \ } /* -- cgit v1.2.3 From 9d475d80c93735f0f336b34a8e2c22beea6145ab Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 28 Jul 2025 15:50:17 +0200 Subject: rv: Retry when da monitor detects race conditions DA monitor can be accessed from multiple cores simultaneously, this is likely, for instance when dealing with per-task monitors reacting on events that do not always occur on the CPU where the task is running. This can cause race conditions where two events change the next state and we see inconsistent values. E.g.: [62] event_srs: 27: sleepable x sched_wakeup -> running (final) [63] event_srs: 27: sleepable x sched_set_state_sleepable -> sleepable [63] error_srs: 27: event sched_switch_suspend not expected in the state running In this case the monitor fails because the event on CPU 62 wins against the one on CPU 63, although the correct state should have been sleepable, since the task get suspended. Detect if the current state was modified by using try_cmpxchg while storing the next value. If it was, try again reading the current state. After a maximum number of failed retries, react by calling a special tracepoint, print on the console and reset the monitor. Remove the functions da_monitor_curr_state() and da_monitor_set_state() as they only hide the underlying implementation in this case. Monitors where this type of condition can occur must be able to account for racing events in any possible order, as we cannot know the winner. Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Tomas Glozar Cc: Juri Lelli Cc: Clark Williams Cc: John Kacur Cc: Peter Zijlstra Link: https://lore.kernel.org/20250728135022.255578-6-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Reviewed-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 3 +- include/rv/da_monitor.h | 107 ++++++++++++++++++++++++------------------------ 2 files changed, 56 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 80731242fe60..14410a42faef 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -10,7 +10,8 @@ #include #include -#define MAX_DA_NAME_LEN 32 +#define MAX_DA_NAME_LEN 32 +#define MAX_DA_RETRY_RACING_EVENTS 3 #ifdef CONFIG_RV #include diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index ed3c34fe18d6..17fa4f6e5ea6 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -54,23 +54,6 @@ static inline void da_monitor_reset_##name(struct da_monitor *da_mon) \ da_mon->curr_state = model_get_initial_state_##name(); \ } \ \ -/* \ - * da_monitor_curr_state_##name - return the current state \ - */ \ -static inline type da_monitor_curr_state_##name(struct da_monitor *da_mon) \ -{ \ - return da_mon->curr_state; \ -} \ - \ -/* \ - * da_monitor_set_state_##name - set the new current state \ - */ \ -static inline void \ -da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name state) \ -{ \ - da_mon->curr_state = state; \ -} \ - \ /* \ * da_monitor_start_##name - start monitoring \ * \ @@ -127,63 +110,81 @@ static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon) * Event handler for implicit monitors. Implicit monitor is the one which the * handler does not need to specify which da_monitor to manipulate. Examples * of implicit monitor are the per_cpu or the global ones. + * + * Retry in case there is a race between getting and setting the next state, + * warn and reset the monitor if it runs out of retries. The monitor should be + * able to handle various orders. */ #define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ \ static inline bool \ da_event_##name(struct da_monitor *da_mon, enum events_##name event) \ { \ - type curr_state = da_monitor_curr_state_##name(da_mon); \ - type next_state = model_get_next_state_##name(curr_state, event); \ - \ - if (next_state != INVALID_STATE) { \ - da_monitor_set_state_##name(da_mon, next_state); \ - \ - trace_event_##name(model_get_state_name_##name(curr_state), \ - model_get_event_name_##name(event), \ - model_get_state_name_##name(next_state), \ - model_is_final_state_##name(next_state)); \ - \ - return true; \ + enum states_##name curr_state, next_state; \ + \ + curr_state = READ_ONCE(da_mon->curr_state); \ + for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \ + next_state = model_get_next_state_##name(curr_state, event); \ + if (next_state == INVALID_STATE) { \ + cond_react_##name(curr_state, event); \ + trace_error_##name(model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event)); \ + return false; \ + } \ + if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) { \ + trace_event_##name(model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(next_state), \ + model_is_final_state_##name(next_state)); \ + return true; \ + } \ } \ \ - cond_react_##name(curr_state, event); \ - \ - trace_error_##name(model_get_state_name_##name(curr_state), \ - model_get_event_name_##name(event)); \ - \ + trace_rv_retries_error(#name, model_get_event_name_##name(event)); \ + pr_warn("rv: " __stringify(MAX_DA_RETRY_RACING_EVENTS) \ + " retries reached for event %s, resetting monitor %s", \ + model_get_event_name_##name(event), #name); \ return false; \ } \ /* * Event handler for per_task monitors. + * + * Retry in case there is a race between getting and setting the next state, + * warn and reset the monitor if it runs out of retries. The monitor should be + * able to handle various orders. */ #define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \ \ static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \ enum events_##name event) \ { \ - type curr_state = da_monitor_curr_state_##name(da_mon); \ - type next_state = model_get_next_state_##name(curr_state, event); \ - \ - if (next_state != INVALID_STATE) { \ - da_monitor_set_state_##name(da_mon, next_state); \ - \ - trace_event_##name(tsk->pid, \ - model_get_state_name_##name(curr_state), \ - model_get_event_name_##name(event), \ - model_get_state_name_##name(next_state), \ - model_is_final_state_##name(next_state)); \ - \ - return true; \ + enum states_##name curr_state, next_state; \ + \ + curr_state = READ_ONCE(da_mon->curr_state); \ + for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \ + next_state = model_get_next_state_##name(curr_state, event); \ + if (next_state == INVALID_STATE) { \ + cond_react_##name(curr_state, event); \ + trace_error_##name(tsk->pid, \ + model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event)); \ + return false; \ + } \ + if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) { \ + trace_event_##name(tsk->pid, \ + model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(next_state), \ + model_is_final_state_##name(next_state)); \ + return true; \ + } \ } \ \ - cond_react_##name(curr_state, event); \ - \ - trace_error_##name(tsk->pid, \ - model_get_state_name_##name(curr_state), \ - model_get_event_name_##name(event)); \ - \ + trace_rv_retries_error(#name, model_get_event_name_##name(event)); \ + pr_warn("rv: " __stringify(MAX_DA_RETRY_RACING_EVENTS) \ + " retries reached for event %s, resetting monitor %s", \ + model_get_event_name_##name(event), #name); \ return false; \ } -- cgit v1.2.3 From adcc3bfa8806761ac21aa271f78454113ec6936e Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 28 Jul 2025 15:50:18 +0200 Subject: sched: Adapt sched tracepoints for RV task model Add the following tracepoint: * sched_set_need_resched(tsk, cpu, tif) Called when a task is set the need resched [lazy] flag Remove the unused ip parameter from sched_entry and sched_exit and alter sched_entry to have a value of preempt consistent with the one used in sched_switch. Also adapt all monitors using sched_{entry,exit} to avoid breaking build. These tracepoints are useful to describe the Linux task model and are adapted from the patches by Daniel Bristot de Oliveira (https://bristot.me/linux-task-model/). Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Nam Cao Cc: Tomas Glozar Cc: Juri Lelli Cc: Clark Williams Cc: John Kacur Link: https://lore.kernel.org/20250728135022.255578-7-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 7 ++++++- include/trace/events/sched.h | 12 ++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index fabd7fe1a07a..91d1fdbc2f56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -339,9 +339,11 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -/* wrapper function to trace from this header file */ +/* wrapper functions to trace from this header file */ DECLARE_TRACEPOINT(sched_set_state_tp); extern void __trace_set_current_state(int state_value); +DECLARE_TRACEPOINT(sched_set_need_resched_tp); +extern void __trace_set_need_resched(struct task_struct *curr, int tif); /** * struct prev_cputime - snapshot of system and user cputime @@ -2063,6 +2065,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) static inline void set_tsk_need_resched(struct task_struct *tsk) { + if (tracepoint_enabled(sched_set_need_resched_tp) && + !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED)) + __trace_set_need_resched(tsk, TIF_NEED_RESCHED); set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4e6b2910cec3..c08893bde255 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -882,18 +882,22 @@ DECLARE_TRACE(sched_compute_energy, TP_ARGS(p, dst_cpu, energy, max_util, busy_time)); DECLARE_TRACE(sched_entry, - TP_PROTO(bool preempt, unsigned long ip), - TP_ARGS(preempt, ip)); + TP_PROTO(bool preempt), + TP_ARGS(preempt)); DECLARE_TRACE(sched_exit, - TP_PROTO(bool is_switch, unsigned long ip), - TP_ARGS(is_switch, ip)); + TP_PROTO(bool is_switch), + TP_ARGS(is_switch)); DECLARE_TRACE_CONDITION(sched_set_state, TP_PROTO(struct task_struct *tsk, int state), TP_ARGS(tsk, state), TP_CONDITION(!!(tsk->__state) != !!state)); +DECLARE_TRACE(sched_set_need_resched, + TP_PROTO(struct task_struct *tsk, int cpu, int tif), + TP_ARGS(tsk, cpu, tif)); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 1edaac340f4da813b258a5e3a6c79804612161a4 Mon Sep 17 00:00:00 2001 From: Klara Modin Date: Fri, 25 Jul 2025 18:43:34 +0200 Subject: block: change blk_get_meta_cap() stub return -ENOIOCTLCMD When introduced in commit 9eb22f7fedfc ("fs: add ioctl to query metadata and protection info capabilities") the stub of blk_get_meta_cap() for !BLK_DEV_INTEGRITY always returns -EOPNOTSUPP. The motivation was that while the command was unsupported in that configuration it was still recognized. A later change instead assumed -ENOIOCTLCMD as is required for unknown ioctl commands per Documentation/driver-api/ioctl.rst. The result being that on !BLK_DEV_INTEGRITY configs, any ioctl which reaches blkdev_common_ioctl() will return -EOPNOTSUPP. Change the stub to return -ENOIOCTLCMD, fixing the issue and better matching with expectations. [ The blkdev_common_ioctl() confusion has been fixed, but -ENOIOCTLCMD is the right thing to return for unrecognized ioctls, so the patch remains the right thing to do. - Linus ] Link: https://lore.kernel.org/lkml/CACzX3AsRd__fXb9=CJPTTJC494SDnYAtYrN2=+bZgMCvM6UQDg@mail.gmail.com Fixes: 42b0ef01e6b5 ("block: fix FS_IOC_GETLBMD_CAP parsing in blkdev_common_ioctl()") Signed-off-by: Klara Modin Reviewed-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/blk-integrity.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index e04c6e5bf1c6..e67a2b6e8f11 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -97,7 +97,7 @@ static inline struct bio_vec rq_integrity_vec(struct request *rq) static inline int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, struct logical_block_metadata_cap __user *argp) { - return -EOPNOTSUPP; + return -ENOIOCTLCMD; } static inline int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *b) -- cgit v1.2.3 From 5dc50b111b40003ed83f74324e8d4023f01bd93e Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 29 Jul 2025 22:58:49 +0800 Subject: ALSA: hda: Fix the wrong register was used for DVC of TAS2770 The wrong register was used for digital volume control of TAS2770, The definition was changed, and usage was also updated. Fixes: ab29b3460c5c ("ALSA: hda: Add TAS2770 support") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250729145849.55057-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2770-tlv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/tas2770-tlv.h b/include/sound/tas2770-tlv.h index c0bd495b4a07..c7380925417a 100644 --- a/include/sound/tas2770-tlv.h +++ b/include/sound/tas2770-tlv.h @@ -14,10 +14,10 @@ #ifndef __TAS2770_TLV_H__ #define __TAS2770_TLV_H__ -#define TAS2770_DVC_LEVEL TASDEVICE_REG(0x0, 0x0, 0x17) +#define TAS2770_DVC_LEVEL TASDEVICE_REG(0x0, 0x0, 0x05) #define TAS2770_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x03) -static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_dvc_tlv, 1650, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_dvc_tlv, -10000, 50, 0); static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2770_amp_tlv, 1100, 50, 0); #endif -- cgit v1.2.3 From 6cff20ce3b92ffbf2fc5eb9e5a030b3672aa414a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 13 Jul 2025 16:31:01 +0200 Subject: PCI/ACPI: Fix runtime PM ref imbalance on Hot-Plug Capable ports pci_bridge_d3_possible() is called from both pcie_portdrv_probe() and pcie_portdrv_remove() to determine whether runtime power management shall be enabled (on probe) or disabled (on remove) on a PCIe port. The underlying assumption is that pci_bridge_d3_possible() always returns the same value, else a runtime PM reference imbalance would occur. That assumption is not given if the PCIe port is inaccessible on remove due to hot-unplug: pci_bridge_d3_possible() calls pciehp_is_native(), which accesses Config Space to determine whether the port is Hot-Plug Capable. An inaccessible port returns "all ones", which is converted to "all zeroes" by pcie_capability_read_dword(). Hence the port no longer seems Hot-Plug Capable on remove even though it was on probe. The resulting runtime PM ref imbalance causes warning messages such as: pcieport 0000:02:04.0: Runtime PM usage count underflow! Avoid the Config Space access (and thus the runtime PM ref imbalance) by caching the Hot-Plug Capable bit in struct pci_dev. The struct already contains an "is_hotplug_bridge" flag, which however is not only set on Hot-Plug Capable PCIe ports, but also Conventional PCI Hot-Plug bridges and ACPI slots. The flag identifies bridges which are allocated additional MMIO and bus number resources to allow for hierarchy expansion. The kernel is somewhat sloppily using "is_hotplug_bridge" in a number of places to identify Hot-Plug Capable PCIe ports, even though the flag encompasses other devices. Subsequent commits replace these occurrences with the new flag to clearly delineate Hot-Plug Capable PCIe ports from other kinds of hotplug bridges. Document the existing "is_hotplug_bridge" and the new "is_pciehp" flag and document the (non-obvious) requirement that pci_bridge_d3_possible() always returns the same value across the entire lifetime of a bridge, including its hot-removal. Fixes: 5352a44a561d ("PCI: pciehp: Make pciehp_is_native() stricter") Reported-by: Laurent Bigonville Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220216 Reported-by: Mario Limonciello Closes: https://lore.kernel.org/r/20250609020223.269407-3-superm1@kernel.org/ Link: https://lore.kernel.org/all/20250620025535.3425049-3-superm1@kernel.org/T/#u Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # v4.18+ Link: https://patch.msgid.link/fe5dcc3b2e62ee1df7905d746bde161eb1b3291c.1752390101.git.lukas@wunner.de --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..d56d0dd80afb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -328,6 +328,11 @@ struct rcec_ea; * determined (e.g., for Root Complex Integrated * Endpoints without the relevant Capability * Registers). + * @is_hotplug_bridge: Hotplug bridge of any kind (e.g. PCIe Hot-Plug Capable, + * Conventional PCI Hot-Plug, ACPI slot). + * Such bridges are allocated additional MMIO and bus + * number resources to allow for hierarchy expansion. + * @is_pciehp: PCIe Hot-Plug Capable bridge. */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ @@ -451,6 +456,7 @@ struct pci_dev { unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int is_hotplug_bridge:1; + unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ /* -- cgit v1.2.3 From c2f9de5e2db29158a8caa86a37aa479488e4ba43 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 13 Jul 2025 16:31:04 +0200 Subject: PCI: Move is_pciehp check out of pciehp_is_native() pci_bridge_d3_possible() seeks to forbid runtime power management on: * Non Hot-Plug Capable PCIe ports which are nevertheless ACPI slots (recognizable as: bridge->is_hotplug_bridge && !bridge->is_pciehp) * Hot-Plug Capable PCIe ports for which platform firmware has not granted PCIe Native Hot-Plug control to the operating system (recognizable as: bridge->is_pciehp && !pciehp_is_native(bridge)) Somewhat confusingly, the check for is_hotplug_bridge is in pci_bridge_d3_possible(), whereas the one for is_pciehp is in pciehp_is_native(). For clarity, check is_pciehp directly in pci_bridge_d3_possible() (and in the other caller of pciehp_is_native(), hotplug_is_native()). Rephrase the code comment preceding these checks to no longer mention "System Management Mode", which is an x86 term inappropriate in generic PCI code. Likewise no longer mention "Thunderbolt on non-Macs", because there is nothing Thunderbolt-specific about these checks. It used to be the case that non-Macs relied on the platform for Thunderbolt tunnel management and hotplug, but they've since moved to OS-native tunnel management (as Macs always have), hence the code comment is no longer accurate. There is a subsequent check for is_hotplug_bridge further down in pci_bridge_d3_possible(). Change the check to is_pciehp because any ports matching "bridge->is_hotplug_bridge && !bridge->is_pciehp" are already filtered out at the top of the function. Do the same for another check in acpi_pci_bridge_d3(), which is called from pci_bridge_d3_possible() via platform_pci_bridge_d3(). No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/18b2c2110ad0f27a34b189d793310b9c4f2f24a0.1752390102.git.lukas@wunner.de --- include/linux/pci_hotplug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index ec77ccf1fc4d..ddf79641917f 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -104,6 +104,7 @@ static inline bool shpchp_is_native(struct pci_dev *bridge) { return true; } static inline bool hotplug_is_native(struct pci_dev *bridge) { - return pciehp_is_native(bridge) || shpchp_is_native(bridge); + return (bridge->is_pciehp && pciehp_is_native(bridge)) || + shpchp_is_native(bridge); } #endif -- cgit v1.2.3 From 71753c6ed2bf2aee5be26c1bc06a94c9e3713ade Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Jul 2025 14:23:05 -0400 Subject: unwind_user: Add user space unwinding API with frame pointer support Introduce a generic API for unwinding user stacks. In order to expand user space unwinding to be able to handle more complex scenarios, such as deferred unwinding and reading user space information, create a generic interface that all architectures can use that support the various unwinding methods. This is an alternative method for handling user space stack traces from the simple stack_trace_save_user() API. This does not replace that interface, but this interface will be used to expand the functionality of user space stack walking. None of the structures introduced will be exposed to user space tooling. Support for frame pointer unwinding is added. For an architecture to support frame pointer unwinding it needs to enable CONFIG_HAVE_UNWIND_USER_FP and define ARCH_INIT_USER_FP_FRAME. By encoding the frame offsets in struct unwind_user_frame, much of this code can also be reused for future unwinder implementations like sframe. Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182404.975790139@kernel.org Reviewed-by: Jens Remus Signed-off-by: Josh Poimboeuf Co-developed-by: Mathieu Desnoyers Link: https://lore.kernel.org/all/20250710164301.3094-2-mathieu.desnoyers@efficios.com/ Signed-off-by: Mathieu Desnoyers Co-developed-by: Steven Rostedt (Google) Signed-off-by: Steven Rostedt (Google) --- include/asm-generic/Kbuild | 1 + include/asm-generic/unwind_user.h | 5 +++++ include/linux/unwind_user.h | 14 +++++++++++++ include/linux/unwind_user_types.h | 44 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 include/asm-generic/unwind_user.h create mode 100644 include/linux/unwind_user.h create mode 100644 include/linux/unwind_user_types.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 8675b7b4ad23..295c94a3ccc1 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -59,6 +59,7 @@ mandatory-y += tlbflush.h mandatory-y += topology.h mandatory-y += trace_clock.h mandatory-y += uaccess.h +mandatory-y += unwind_user.h mandatory-y += vermagic.h mandatory-y += vga.h mandatory-y += video.h diff --git a/include/asm-generic/unwind_user.h b/include/asm-generic/unwind_user.h new file mode 100644 index 000000000000..b8882b909944 --- /dev/null +++ b/include/asm-generic/unwind_user.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_UNWIND_USER_H +#define _ASM_GENERIC_UNWIND_USER_H + +#endif /* _ASM_GENERIC_UNWIND_USER_H */ diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h new file mode 100644 index 000000000000..7f7282516bf5 --- /dev/null +++ b/include/linux/unwind_user.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_H +#define _LINUX_UNWIND_USER_H + +#include +#include + +#ifndef ARCH_INIT_USER_FP_FRAME + #define ARCH_INIT_USER_FP_FRAME +#endif + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); + +#endif /* _LINUX_UNWIND_USER_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h new file mode 100644 index 000000000000..a449f15be890 --- /dev/null +++ b/include/linux/unwind_user_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_TYPES_H +#define _LINUX_UNWIND_USER_TYPES_H + +#include + +/* + * Unwind types, listed in priority order: lower numbers are attempted first if + * available. + */ +enum unwind_user_type_bits { + UNWIND_USER_TYPE_FP_BIT = 0, + + NR_UNWIND_USER_TYPE_BITS, +}; + +enum unwind_user_type { + /* Type "none" for the start of stack walk iteration. */ + UNWIND_USER_TYPE_NONE = 0, + UNWIND_USER_TYPE_FP = BIT(UNWIND_USER_TYPE_FP_BIT), +}; + +struct unwind_stacktrace { + unsigned int nr; + unsigned long *entries; +}; + +struct unwind_user_frame { + s32 cfa_off; + s32 ra_off; + s32 fp_off; + bool use_fp; +}; + +struct unwind_user_state { + unsigned long ip; + unsigned long sp; + unsigned long fp; + enum unwind_user_type current_type; + unsigned int available_types; + bool done; +}; + +#endif /* _LINUX_UNWIND_USER_TYPES_H */ -- cgit v1.2.3 From 5e32d0f15cc5c843a4115c4644d984d42524c794 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:06 -0400 Subject: unwind_user/deferred: Add unwind_user_faultable() Add a new API to retrieve a user space callstack called unwind_user_faultable(). The difference between this user space stack tracer from the current user space stack tracer is that this must be called from faultable context as it may use routines to access user space data that needs to be faulted in. It can be safely called from entering or exiting a system call as the code can still be faulted in there. This code is based on work by Josh Poimboeuf's deferred unwinding code: Link: https://lore.kernel.org/all/6052e8487746603bdb29b65f4033e739092d9925.1737511963.git.jpoimboe@kernel.org/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.147896868@kernel.org Reviewed-by: Jens Remus Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 5 +++++ include/linux/unwind_deferred.h | 24 ++++++++++++++++++++++++ include/linux/unwind_deferred_types.h | 9 +++++++++ 3 files changed, 38 insertions(+) create mode 100644 include/linux/unwind_deferred.h create mode 100644 include/linux/unwind_deferred_types.h (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..59fdf7d9bb1e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,6 +46,7 @@ #include #include #include +#include #include /* task_struct member predeclarations (sorted alphabetically): */ @@ -1654,6 +1655,10 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif +#ifdef CONFIG_UNWIND_USER + struct unwind_task_info unwind_info; +#endif + /* CPU-specific state of this task: */ struct thread_struct thread; diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h new file mode 100644 index 000000000000..a5f6e8f8a1a2 --- /dev/null +++ b/include/linux/unwind_deferred.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_H +#define _LINUX_UNWIND_USER_DEFERRED_H + +#include +#include + +#ifdef CONFIG_UNWIND_USER + +void unwind_task_init(struct task_struct *task); +void unwind_task_free(struct task_struct *task); + +int unwind_user_faultable(struct unwind_stacktrace *trace); + +#else /* !CONFIG_UNWIND_USER */ + +static inline void unwind_task_init(struct task_struct *task) {} +static inline void unwind_task_free(struct task_struct *task) {} + +static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } + +#endif /* !CONFIG_UNWIND_USER */ + +#endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h new file mode 100644 index 000000000000..aa32db574e43 --- /dev/null +++ b/include/linux/unwind_deferred_types.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H +#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H + +struct unwind_task_info { + unsigned long *entries; +}; + +#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ -- cgit v1.2.3 From 94fd44648dae2a5b6149a41faa0b07928c3e1963 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 29 Jul 2025 16:18:25 -0700 Subject: fortify: Fix incorrect reporting of read buffer size When FORTIFY_SOURCE reports about a run-time buffer overread, the wrong buffer size was being shown in the error message. (The bounds checking was correct.) Fixes: 3d965b33e40d ("fortify: Improve buffer overflow reporting") Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20250729231817.work.023-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index e4ce1cae03bf..b3b53f8c1b28 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -596,7 +596,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, if (p_size != SIZE_MAX && p_size < size) fortify_panic(func, FORTIFY_WRITE, p_size, size, true); else if (q_size != SIZE_MAX && q_size < size) - fortify_panic(func, FORTIFY_READ, p_size, size, true); + fortify_panic(func, FORTIFY_READ, q_size, size, true); /* * Warn when writing beyond destination field size. -- cgit v1.2.3 From f627b51aaa041cba715b59026cf2d9cb1476c7ed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 29 Jul 2025 16:41:00 -0700 Subject: compiler_types: Provide __no_kstack_erase to disable coverage only on Clang In order to support Clang's stack depth tracking (for Linux's kstack_erase feature), the coverage sanitizer needed to be disabled for __init (and __head) section code. Doing this universally (i.e. for GCC too) created a number of unexpected problems, ranging from changes to inlining logic to failures to DCE code on earlier GCC versions. Since this change is only needed for Clang, specialize it so that GCC doesn't see the change as it isn't needed there (the GCC implementation of kstack_erase uses a GCC plugin that removes stack depth tracking instrumentation from __init sections during a late pass in the IR). Successfully build and boot tested with GCC 12 and Clang 22. Fixes: 381a38ea53d2 ("init.h: Disable sanitizer coverage for __init and __head") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507270258.neWuiXLd-lkp@intel.com/ Reported-by: syzbot+5245cb609175fb6e8122@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6888d004.a00a0220.26d0e1.0004.GAE@google.com/ Reviewed-by: Nathan Chancellor Reviewed-by: Marco Elver Link: https://lore.kernel.org/r/20250729234055.it.233-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/compiler-clang.h | 3 +++ include/linux/compiler_types.h | 4 ++++ include/linux/init.h | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 4fc8e26914ad..fa4ffe037bc7 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -89,6 +89,9 @@ #define __no_sanitize_coverage #endif +/* Only Clang needs to disable the coverage sanitizer for kstack_erase. */ +#define __no_kstack_erase __no_sanitize_coverage + #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 2b77d12e07b2..16755431fc11 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -424,6 +424,10 @@ struct ftrace_likely_data { # define randomized_struct_fields_end #endif +#ifndef __no_kstack_erase +# define __no_kstack_erase +#endif + #ifndef __noscs # define __noscs #endif diff --git a/include/linux/init.h b/include/linux/init.h index c65a050d52a7..a60d32d227ee 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -51,7 +51,7 @@ discard it in modules) */ #define __init __section(".init.text") __cold __latent_entropy \ __noinitretpoline \ - __no_sanitize_coverage + __no_kstack_erase #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") #define __exitdata __section(".exit.data") -- cgit v1.2.3 From a11a51896572273d04a9f6011ad22738c52ba554 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 4 Jun 2025 15:52:19 +0200 Subject: spi: spi-mem: Take into account the actual maximum frequency In order to pick the best variant, the duration of each typical operation is derived and then compared. These durations are based on the maximum capabilities of the chips, which are commonly the limiting factors. However there are other possible limiting pieces, such as the hardware layout, EMC considerations and in some cases, the SPI controller itself. We need to take this into account to further refine our variant choice, so let's use the actual frequency that will be used for the operation instead of the theoretical maximum. Signed-off-by: Miquel Raynal Reviewed-by: Mark Brown --- include/linux/spi/spi-mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index c4830dfaff3d..82390712794c 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -424,7 +424,7 @@ bool spi_mem_default_supports_op(struct spi_mem *mem, int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op); void spi_mem_adjust_op_freq(struct spi_mem *mem, struct spi_mem_op *op); -u64 spi_mem_calc_op_duration(struct spi_mem_op *op); +u64 spi_mem_calc_op_duration(struct spi_mem *mem, struct spi_mem_op *op); bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op); -- cgit v1.2.3 From 5de7ea49653f6b988525b559802da615a61ffbea Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 14:14:20 +0200 Subject: mtd: spinand: Fix macro alignment No functional change, just a style fix to align with the other macros all around. Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 15eaa09da998..28a013f4f4f3 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -71,9 +71,9 @@ #define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ - SPI_MEM_OP_ADDR(2, addr, 1), \ - SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ -- cgit v1.2.3 From d81ad9d78e2cd5bdefd390a83553203668a96092 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 14:14:21 +0200 Subject: mtd: spinand: Add a frequency field to all READ_FROM_CACHE variants These macros had initially no frequency field. When I added the "maximum operation frequency" field, I did it initially on very common macros and I decided to add an optional field for that (with VA_ARGS) in order to prevent massively unreadable changes. I then added new variants in the spinand.h header, and requested a frequency field for them by default. Some times later, I also added maximum frequencies to other existing variants, but I did it incorrectly, without noticing I was wrong because the field was optional. This mix is error prone, so let's do what I should have done since the very beginning: add a frequency field to all READ_FROM_CACHE variants. There is no functional change. Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 57 ++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 28a013f4f4f3..61a4571cec7e 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -62,30 +62,33 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ - SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 1)) + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0d, 1), \ @@ -94,17 +97,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 1), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3d, 1), \ @@ -113,18 +118,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ SPI_MEM_OP_DATA_IN(len, buf, 2), \ - SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(3, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1), \ @@ -133,17 +139,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6d, 1), \ @@ -152,18 +160,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ SPI_MEM_OP_DATA_IN(len, buf, 4), \ - SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(3, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) #define SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xed, 1), \ -- cgit v1.2.3 From da55809ebb45d1d80b7a388ffef841ed683e1a6f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 14:14:22 +0200 Subject: mtd: spinand: Add a ->configure_chip() hook There is already a manufacturer hook, which is manufacturer specific but not chip specific. We no longer have access to the actual NAND identity at this stage so let's add a per-chip configuration hook to align the chip configuration (if any) with the core's setting. Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 61a4571cec7e..69674fd191d9 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -493,6 +493,7 @@ struct spinand_user_otp { * @op_variants.update_cache: variants of the update-cache operation * @select_target: function used to select a target/die. Required only for * multi-die chips + * @configure_chip: Align the chip configuration with the core settings * @set_cont_read: enable/disable continuous cached reads * @fact_otp: SPI NAND factory OTP info. * @user_otp: SPI NAND user OTP info. @@ -516,6 +517,7 @@ struct spinand_info { } op_variants; int (*select_target)(struct spinand_device *spinand, unsigned int target); + int (*configure_chip)(struct spinand_device *spinand); int (*set_cont_read)(struct spinand_device *spinand, bool enable); struct spinand_fact_otp fact_otp; @@ -548,6 +550,9 @@ struct spinand_info { #define SPINAND_SELECT_TARGET(__func) \ .select_target = __func +#define SPINAND_CONFIGURE_CHIP(__configure_chip) \ + .configure_chip = __configure_chip + #define SPINAND_CONT_READ(__set_cont_read) \ .set_cont_read = __set_cont_read @@ -616,6 +621,7 @@ struct spinand_dirmap { * passed in spi_mem_op be DMA-able, so we can't based the bufs on * the stack * @manufacturer: SPI NAND manufacturer information + * @configure_chip: Align the chip configuration with the core settings * @cont_read_possible: Field filled by the core once the whole system * configuration is known to tell whether continuous reads are * suitable to use or not in general with this chip/configuration. @@ -656,6 +662,7 @@ struct spinand_device { const struct spinand_manufacturer *manufacturer; void *priv; + int (*configure_chip)(struct spinand_device *spinand); bool cont_read_possible; int (*set_cont_read)(struct spinand_device *spinand, bool enable); -- cgit v1.2.3 From f1a91175faaab02a45d1ceb313a315a5bfeb5416 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 14:14:23 +0200 Subject: mtd: spinand: winbond: Enable high-speed modes on w25n0xjw w25n0xjw chips have a high-speed capability hidden in a configuration register. Once enabled, dual/quad SDR reads may be performed at a much higher frequency. Implement the new ->configure_chip() hook for this purpose and configure the SR4 register accordingly. Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 69674fd191d9..53c881e41fc7 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -739,6 +739,7 @@ int spinand_match_and_init(struct spinand_device *spinand, enum spinand_readid_method rdid_method); int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); +int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val); int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); int spinand_select_target(struct spinand_device *spinand, unsigned int target); -- cgit v1.2.3 From 535f30d997baa5e5c6a3a4024d49e1871232c72b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 14:14:24 +0200 Subject: mtd: spinand: winbond: Enable high-speed modes on w35n0xjw w35n0xjw chips can run at up to 166MHz in octal mode, but this is only possible after programming various VCR registers. Implement the new ->configure_chip() hook for this purpose. Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 53c881e41fc7..27a45bdab7ec 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -741,6 +741,7 @@ int spinand_match_and_init(struct spinand_device *spinand, int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val); int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); +int spinand_write_enable_op(struct spinand_device *spinand); int spinand_select_target(struct spinand_device *spinand, unsigned int target); int spinand_wait(struct spinand_device *spinand, unsigned long initial_delay_us, -- cgit v1.2.3 From 6f02527729bd31ca4e473bff19fda4ccd5889148 Mon Sep 17 00:00:00 2001 From: Norman Maurer Date: Mon, 28 Jul 2025 20:59:53 -1000 Subject: io_uring/net: Allow to do vectorized send At the moment you have to use sendmsg for vectorized send. While this works it's suboptimal as it also means you need to allocate a struct msghdr that needs to be kept alive until a submission happens. We can remove this limitation by just allowing to use send directly. Signed-off-by: Norman Maurer Link: https://lore.kernel.org/r/20250729065952.26646-1-norman_maurer@apple.com [axboe: remove -EINVAL return for SENDMSG and SEND_VECTORIZED] [axboe: allow send_zc to set SEND_VECTORIZED too] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b8a0e70ee2fd..6957dc539d83 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -392,12 +392,16 @@ enum io_uring_op { * the starting buffer ID in cqe->flags as per * usual for provided buffer usage. The buffers * will be contiguous from the starting buffer ID. + * + * IORING_SEND_VECTORIZED If set, SEND[_ZC] will take a pointer to a io_vec + * to allow vectorized send operations. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) #define IORING_RECVSEND_BUNDLE (1U << 4) +#define IORING_SEND_VECTORIZED (1U << 5) /* * cqe.res for IORING_CQE_F_NOTIF if -- cgit v1.2.3 From 907a99c314a5a695e35acff78ac61f4ec950a6d3 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Tue, 22 Jul 2025 11:33:40 +0800 Subject: md: rename recovery_cp to resync_offset 'recovery_cp' was used to represent the progress of sync, but its name contains recovery, which can cause confusion. Replaces 'recovery_cp' with 'resync_offset' for clarity. Signed-off-by: Li Nan Link: https://lore.kernel.org/linux-raid/20250722033340.1933388-1-linan666@huaweicloud.com Signed-off-by: Yu Kuai --- include/uapi/linux/raid/md_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ff47b6f0ba0f..b13946287277 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 recovery_cp; /* 11 recovery checkpoint sector count */ + __u32 resync_offset; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ -- cgit v1.2.3 From 5523a466e905b6287b94654ddb364536f2f948cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jul 2025 11:06:03 +0200 Subject: i3c: fix module_i3c_i2c_driver() with I3C=n When CONFIG_I3C is disabled and the i3c_i2c_driver_register() happens to not be inlined, any driver calling it still references the i3c_driver instance, which then causes a link failure: x86_64-linux-ld: drivers/hwmon/lm75.o: in function `lm75_i3c_reg_read': lm75.c:(.text+0xc61): undefined reference to `i3cdev_to_dev' x86_64-linux-ld: lm75.c:(.text+0xd25): undefined reference to `i3c_device_do_priv_xfers' x86_64-linux-ld: lm75.c:(.text+0xdd8): undefined reference to `i3c_device_do_priv_xfers' This issue was part of the original i3c code, but only now caused problems when i3c support got added to lm75. Change the 'inline' annotations in the header to '__always_inline' to ensure that the dead-code-elimination pass in the compiler can optimize it out as intended. Fixes: 6071d10413ff ("hwmon: (lm75) add I3C support for P3T1755") Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Arnd Bergmann Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Guenter Roeck Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250725090609.2456262-1-arnd@kernel.org Signed-off-by: Alexandre Belloni --- include/linux/i3c/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index b674f64d0822..7f136de4b73e 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv); * * Return: 0 if both registrations succeeds, a negative error code otherwise. */ -static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, +static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { int ret; @@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, * Note that when CONFIG_I3C is not enabled, this function only unregisters the * @i2cdrv. */ -static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, +static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { if (IS_ENABLED(CONFIG_I3C)) -- cgit v1.2.3 From 9c0609d685b27a0bb392390680207baa820ed118 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:40 +0200 Subject: i3c: Standardize defines for specification parameters Align existing defines to follow the consistent pattern: I3C_BUS___. Prepare the codebase for adding new parameters and help avoid duplication. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index c67922ece617..7dfcbe530515 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -249,10 +249,11 @@ struct i3c_device { */ #define I3C_BUS_MAX_DEVS 11 -#define I3C_BUS_MAX_I3C_SCL_RATE 12900000 -#define I3C_BUS_TYP_I3C_SCL_RATE 12500000 -#define I3C_BUS_I2C_FM_PLUS_SCL_RATE 1000000 -#define I3C_BUS_I2C_FM_SCL_RATE 400000 +/* Taken from the I3C Spec V1.1.1, chapter 6.2. "Timing specification" */ +#define I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE 1000000 +#define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 +#define I3C_BUS_I3C_SCL_MAX_RATE 12900000 +#define I3C_BUS_I3C_SCL_TYP_RATE 12500000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** -- cgit v1.2.3 From 8acf1f3bae1ea48949458b67d68a72a95c3244a4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:41 +0200 Subject: i3c: Add more parameters for controllers to the header Add standard timing value definition from specification. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-3-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 7dfcbe530515..043f5c7ff398 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -254,6 +254,10 @@ struct i3c_device { #define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 #define I3C_BUS_I3C_SCL_MAX_RATE 12900000 #define I3C_BUS_I3C_SCL_TYP_RATE 12500000 +#define I3C_BUS_TAVAL_MIN_NS 1000 +#define I3C_BUS_TBUF_MIXED_FM_MIN_NS 1300 +#define I3C_BUS_THIGH_MIXED_MAX_NS 41 +#define I3C_BUS_TIDLE_MIN_NS 200000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** -- cgit v1.2.3 From 0060beec0bfa647c4b510df188b1c4673a197839 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Jul 2025 13:04:29 +0900 Subject: ata: libata-sata: Add link_power_management_supported sysfs attribute A port link power management (LPM) policy can be controlled using the link_power_management_policy sysfs host attribute. However, this attribute exists also for hosts that do not support LPM and in such case, attempting to change the LPM policy for the host (port) will fail with -EOPNOTSUPP. Introduce the new sysfs link_power_management_supported host attribute to indicate to the user if a the port and the devices connected to the port for the host support LPM, which implies that the link_power_management_policy attribute can be used. Since checking that a port and its devices support LPM is common between the new ata_scsi_lpm_supported_show() function and the existing ata_scsi_lpm_store() function, the new helper ata_scsi_lpm_supported() is introduced. Fixes: 413e800cadbf ("ata: libata-sata: Disallow changing LPM state if not supported") Reported-by: Borah, Chaitanya Kumar Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202507251014.a5becc3b-lkp@intel.com Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 912ace523880..0620dd67369f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -545,6 +545,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes) extern struct device_attribute dev_attr_unload_heads; #ifdef CONFIG_SATA_HOST +extern struct device_attribute dev_attr_link_power_management_supported; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_supported; extern struct device_attribute dev_attr_ncq_prio_enable; -- cgit v1.2.3 From 199d9ffb31650f948dd342ade1c1b920e157630f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:36 +0200 Subject: module: move 'struct module_use' to internal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct was moved to the public header file in commit c8e21ced08b3 ("module: fix kdb's illicit use of struct module_use."). Back then the structure was used outside of the module core. Nowadays this is not true anymore, so the structure can be made internal. Signed-off-by: Thomas Weißschuh Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-1-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- include/linux/module.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index a7cac01d95e7..97c38e1cd377 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -313,13 +313,6 @@ void *__symbol_get_gpl(const char *symbol); __used __section(".no_trim_symbol") = __stringify(x); \ (typeof(&x))(__symbol_get(__stringify(x))); }) -/* modules using other modules: kdb wants to see this. */ -struct module_use { - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ -- cgit v1.2.3 From 818783c804bc051f7faf0ac226b5597f8259c6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:37 +0200 Subject: module: make structure definitions always visible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To write code that works with both CONFIG_MODULES=y and CONFIG_MODULES=n it is convenient to use "if (IS_ENABLED(CONFIG_MODULES))" over raw #ifdef. The code will still fully typechecked but the unreachable parts are discarded by the compiler. This prevents accidental breakage when a certain kconfig combination was not specifically tested by the developer. This pattern is already supported to some extend by module.h defining empty stub functions if CONFIG_MODULES=n. However some users of module.h work on the structured defined by module.h. Therefore these structure definitions need to be visible, too. Many structure members are still gated by specific configuration settings. The assumption for those is that the code using them will be gated behind the same configuration setting anyways. Signed-off-by: Thomas Weißschuh Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-2-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- include/linux/module.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 97c38e1cd377..5fe812de2d84 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -303,16 +303,6 @@ static typeof(name) __mod_device_table__##type##__##name \ struct notifier_block; -#ifdef CONFIG_MODULES - -/* Get/put a kernel symbol (calls must be symmetric) */ -void *__symbol_get(const char *symbol); -void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ({ \ - static const char __notrim[] \ - __used __section(".no_trim_symbol") = __stringify(x); \ - (typeof(&x))(__symbol_get(__stringify(x))); }) - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ @@ -597,6 +587,16 @@ struct module { #define MODULE_ARCH_INIT {} #endif +#ifdef CONFIG_MODULES + +/* Get/put a kernel symbol (calls must be symmetric) */ +void *__symbol_get(const char *symbol); +void *__symbol_get_gpl(const char *symbol); +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) + #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { -- cgit v1.2.3 From bdc877ba6b7ff1b6d2ebeff11e63da4a50a54854 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:34 +0200 Subject: module: Restore the moduleparam prefix length check The moduleparam code allows modules to provide their own definition of MODULE_PARAM_PREFIX, instead of using the default KBUILD_MODNAME ".". Commit 730b69d22525 ("module: check kernel param length at compile time, not runtime") added a check to ensure the prefix doesn't exceed MODULE_NAME_LEN, as this is what param_sysfs_builtin() expects. Later, commit 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.") removed this check, but there is no indication this was intentional. Since the check is still useful for param_sysfs_builtin() to function properly, reintroduce it in __module_param_call(), but in a modernized form using static_assert(). While here, clean up the __module_param_call() comments. In particular, remove the comment "Default value instead of permissions?", which comes from commit 9774a1f54f17 ("[PATCH] Compile-time check re world-writeable module params"). This comment was related to the test variable __param_perm_check_##name, which was removed in the previously mentioned commit 58f86cc89c33. Fixes: 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.") Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-4-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- include/linux/moduleparam.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..110e9d09de24 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -282,10 +282,9 @@ struct kparam_array #define __moduleparam_const const #endif -/* This is the fundamental function for registering boot/module - parameters. */ +/* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - /* Default value instead of permissions? */ \ + static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ -- cgit v1.2.3 From 40a826bd6c82ae45cfd3a19cd2a60a10f56b74c0 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:36 +0200 Subject: module: Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN The maximum module name length (MODULE_NAME_LEN) is somewhat confusingly defined in terms of the maximum parameter prefix length (MAX_PARAM_PREFIX_LEN), when in fact the dependency is in the opposite direction. This split originates from commit 730b69d22525 ("module: check kernel param length at compile time, not runtime"). The code needed to use MODULE_NAME_LEN in moduleparam.h, but because module.h requires moduleparam.h, this created a circular dependency. It was resolved by introducing MAX_PARAM_PREFIX_LEN in moduleparam.h and defining MODULE_NAME_LEN in module.h in terms of MAX_PARAM_PREFIX_LEN. Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN for clarity. This matches the similar approach of defining MODULE_INFO in module.h and __MODULE_INFO in moduleparam.h. Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-6-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- include/linux/module.h | 2 +- include/linux/moduleparam.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 5fe812de2d84..313ecb8e5181 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -33,7 +33,7 @@ #include #include -#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN +#define MODULE_NAME_LEN __MODULE_NAME_LEN struct modversion_info { unsigned long crc; diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 110e9d09de24..a04a2bc4f51e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -6,6 +6,13 @@ #include #include +/* + * The maximum module name length, including the NUL byte. + * Chosen so that structs with an unsigned long line up, specifically + * modversion_info. + */ +#define __MODULE_NAME_LEN (64 - sizeof(unsigned long)) + /* You can override this manually, but generally this should match the module name. */ #ifdef MODULE @@ -17,9 +24,6 @@ #define __MODULE_INFO_PREFIX KBUILD_MODNAME "." #endif -/* Chosen so that structs with an unsigned long line up. */ -#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) - #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ __used __section(".modinfo") __aligned(1) \ @@ -284,7 +288,7 @@ struct kparam_array /* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ + static_assert(sizeof(""prefix) - 1 <= __MODULE_NAME_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ -- cgit v1.2.3 From b9c73524106e1c0c857006fb9ff2e5a510dc4021 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Jul 2025 14:23:07 -0400 Subject: unwind_user/deferred: Add unwind cache Cache the results of the unwind to ensure the unwind is only performed once, even when called by multiple tracers. The cache nr_entries gets cleared every time the task exits the kernel. When a stacktrace is requested, nr_entries gets set to the number of entries in the stacktrace. If another stacktrace is requested, if nr_entries is not zero, then it contains the same stacktrace that would be retrieved so it is not processed again and the entries is given to the caller. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.319691167@kernel.org Reviewed-by: Jens Remus Reviewed-By: Indu Bhagat Co-developed-by: Steven Rostedt (Google) Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/entry-common.h | 2 ++ include/linux/unwind_deferred.h | 8 ++++++++ include/linux/unwind_deferred_types.h | 7 ++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index f94f3fdf15fc..8908b8eeb99b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -362,6 +363,7 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on_prepare(); instrumentation_end(); + unwind_reset_info(); user_enter_irqoff(); arch_exit_to_user_mode(); lockdep_hardirqs_on(CALLER_ADDR0); diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index a5f6e8f8a1a2..baacf4a1eb4c 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -12,6 +12,12 @@ void unwind_task_free(struct task_struct *task); int unwind_user_faultable(struct unwind_stacktrace *trace); +static __always_inline void unwind_reset_info(void) +{ + if (unlikely(current->unwind_info.cache)) + current->unwind_info.cache->nr_entries = 0; +} + #else /* !CONFIG_UNWIND_USER */ static inline void unwind_task_init(struct task_struct *task) {} @@ -19,6 +25,8 @@ static inline void unwind_task_free(struct task_struct *task) {} static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline void unwind_reset_info(void) {} + #endif /* !CONFIG_UNWIND_USER */ #endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index aa32db574e43..db5b54b18828 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -2,8 +2,13 @@ #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H +struct unwind_cache { + unsigned int nr_entries; + unsigned long entries[]; +}; + struct unwind_task_info { - unsigned long *entries; + struct unwind_cache *cache; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ -- cgit v1.2.3 From 2dffa355f6c279e7d2e574abf9446c41a631c9e5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Jul 2025 14:23:08 -0400 Subject: unwind_user/deferred: Add deferred unwinding interface Add an interface for scheduling task work to unwind the user space stack before returning to user space. This solves several problems for its callers: - Ensure the unwind happens in task context even if the caller may be running in interrupt context. - Avoid duplicate unwinds, whether called multiple times by the same caller or by different callers. - Create a "context cookie" which allows trace post-processing to correlate kernel unwinds/traces with the user unwind. A concept of a "cookie" is created to detect when the stacktrace is the same. A cookie is generated the first time a user space stacktrace is requested after the task enters the kernel. As the stacktrace is saved on the task_struct while the task is in the kernel, if another request comes in, if the cookie is still the same, it will use the saved stacktrace, and not have to regenerate one. The cookie is passed to the caller on request, and when the stacktrace is generated upon returning to user space, it calls the requester's callback with the cookie as well as the stacktrace. The cookie is cleared when it goes back to user space. Note, this currently adds another conditional to the unwind_reset_info() path that is always called returning to user space, but future changes will put this back to a single conditional. A global list is created and protected by a global mutex that holds tracers that register with the unwind infrastructure. The number of registered tracers will be limited in future changes. Each perf program or ftrace instance will register its own descriptor to use for deferred unwind stack traces. Note, in the function unwind_deferred_task_work() that gets called when returning to user space, it uses a global mutex for synchronization which will cause a big bottleneck. This will be replaced by SRCU, but that change adds some complex synchronization that deservers its own commit. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.488066537@kernel.org Co-developed-by: Steven Rostedt (Google) Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 24 ++++++++++++++++++++++++ include/linux/unwind_deferred_types.h | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index baacf4a1eb4c..14efd8c027aa 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -2,9 +2,19 @@ #ifndef _LINUX_UNWIND_USER_DEFERRED_H #define _LINUX_UNWIND_USER_DEFERRED_H +#include #include #include +struct unwind_work; + +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie); + +struct unwind_work { + struct list_head list; + unwind_callback_t func; +}; + #ifdef CONFIG_UNWIND_USER void unwind_task_init(struct task_struct *task); @@ -12,8 +22,19 @@ void unwind_task_free(struct task_struct *task); int unwind_user_faultable(struct unwind_stacktrace *trace); +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); +int unwind_deferred_request(struct unwind_work *work, u64 *cookie); +void unwind_deferred_cancel(struct unwind_work *work); + static __always_inline void unwind_reset_info(void) { + if (unlikely(current->unwind_info.id.id)) + current->unwind_info.id.id = 0; + /* + * As unwind_user_faultable() can be called directly and + * depends on nr_entries being cleared on exit to user, + * this needs to be a separate conditional. + */ if (unlikely(current->unwind_info.cache)) current->unwind_info.cache->nr_entries = 0; } @@ -24,6 +45,9 @@ static inline void unwind_task_init(struct task_struct *task) {} static inline void unwind_task_free(struct task_struct *task) {} static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } +static inline void unwind_deferred_cancel(struct unwind_work *work) {} static inline void unwind_reset_info(void) {} diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index db5b54b18828..104c477d5609 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -7,8 +7,32 @@ struct unwind_cache { unsigned long entries[]; }; +/* + * The unwind_task_id is a unique identifier that maps to a user space + * stacktrace. It is generated the first time a deferred user space + * stacktrace is requested after a task has entered the kerenl and + * is cleared to zero when it exits. The mapped id will be a non-zero + * number. + * + * To simplify the generation of the 64 bit number, 32 bits will be + * the CPU it was generated on, and the other 32 bits will be a per + * cpu counter that gets incremented by two every time a new identifier + * is generated. The LSB will always be set to keep the value + * from being zero. + */ +union unwind_task_id { + struct { + u32 cpu; + u32 cnt; + }; + u64 id; +}; + struct unwind_task_info { struct unwind_cache *cache; + struct callback_head work; + union unwind_task_id id; + int pending; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ -- cgit v1.2.3 From be3d526a5b34109cecf3bc23b96f0081ad600a5b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:10 -0400 Subject: unwind deferred: Use bitmask to determine which callbacks to call In order to know which registered callback requested a stacktrace for when the task goes back to user space, add a bitmask to keep track of all registered tracers. The bitmask is the size of long, which means that on a 32 bit machine, it can have at most 32 registered tracers, and on 64 bit, it can have at most 64 registered tracers. This should not be an issue as there should not be more than 10 (unless BPF can abuse this?). When a tracer registers with unwind_deferred_init() it will get a bit number assigned to it. When a tracer requests a stacktrace, it will have its bit set within the task_struct. When the task returns back to user space, it will call the callbacks for all the registered tracers where their bits are set in the task's mask. When a tracer is removed by the unwind_deferred_cancel() all current tasks will clear the associated bit, just in case another tracer gets registered immediately afterward and then gets their callback called unexpectedly. To prevent live locks from happening if an event that happens between the task_work and when the task goes back to user space, triggers the deferred unwind, have the unwind_mask get cleared on exit to user space and not after the callback is made. Move the pending bit from a value on the task_struct to bit zero of the unwind_mask (saves space on the task_struct). This will allow modifying the pending bit along with the work bits atomically. Instead of clearing a work's bit after its callback is called, it is delayed until exit. If the work is requested again, the task_work is not queued again and the request will be notified that the task has already been called by returning a positive number (the same as if it was already pending). The pending bit is cleared before calling the callback functions but the current work bits remain. If one of the called works registers again, it will not trigger a task_work if its bit is still present in the task's unwind_mask. If a new work requests a deferred unwind, then it will set both the pending bit and its own bit. Note this will also cause any work that was previously queued and had their callback already executed to be executed again. Future work will remove these spurious callbacks. The use of atomic_long bit operations were suggested by Peter Zijlstra: Link: https://lore.kernel.org/all/20250715102912.GQ1613200@noisy.programming.kicks-ass.net/ The unwind_mask could not be converted to atomic_long_t do to atomic_long not having all the bit operations needed by unwind_mask. Instead it follows other use cases in the kernel and just typecasts the unwind_mask to atomic_long_t when using the two atomic_long functions. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.822789300@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 26 +++++++++++++++++++++++--- include/linux/unwind_deferred_types.h | 2 +- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index 14efd8c027aa..337ead927d4d 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -13,10 +13,19 @@ typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stackt struct unwind_work { struct list_head list; unwind_callback_t func; + int bit; }; #ifdef CONFIG_UNWIND_USER +enum { + UNWIND_PENDING_BIT = 0, +}; + +enum { + UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), +}; + void unwind_task_init(struct task_struct *task); void unwind_task_free(struct task_struct *task); @@ -28,15 +37,26 @@ void unwind_deferred_cancel(struct unwind_work *work); static __always_inline void unwind_reset_info(void) { - if (unlikely(current->unwind_info.id.id)) + struct unwind_task_info *info = ¤t->unwind_info; + unsigned long bits; + + /* Was there any unwinding? */ + if (unlikely(info->unwind_mask)) { + bits = info->unwind_mask; + do { + /* Is a task_work going to run again before going back */ + if (bits & UNWIND_PENDING) + return; + } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); current->unwind_info.id.id = 0; + } /* * As unwind_user_faultable() can be called directly and * depends on nr_entries being cleared on exit to user, * this needs to be a separate conditional. */ - if (unlikely(current->unwind_info.cache)) - current->unwind_info.cache->nr_entries = 0; + if (unlikely(info->cache)) + info->cache->nr_entries = 0; } #else /* !CONFIG_UNWIND_USER */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index 104c477d5609..5dc9cda141ff 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -29,10 +29,10 @@ union unwind_task_id { }; struct unwind_task_info { + unsigned long unwind_mask; struct unwind_cache *cache; struct callback_head work; union unwind_task_id id; - int pending; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ -- cgit v1.2.3 From 4c75133e745aa95636c9ccbab1603ed363dabcd4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:11 -0400 Subject: unwind deferred: Add unwind_completed mask to stop spurious callbacks If there's more than one registered tracer to the unwind deferred infrastructure, it is currently possible that one tracer could cause extra callbacks to happen for another tracer if the former requests a deferred stacktrace after the latter's callback was executed and before the task went back to user space. Here's an example of how this could occur: [Task enters kernel] tracer 1 request -> add cookie to its buffer tracer 1 request -> add cookie to its buffer <..> [ task work executes ] tracer 1 callback -> add trace + cookie to its buffer [tracer 2 requests and triggers the task work again] [ task work executes again ] tracer 1 callback -> add trace + cookie to its buffer tracer 2 callback -> add trace + cookie to its buffer [Task exits back to user space] This is because the bit for tracer 1 gets set in the task's unwind_mask when it did its request and does not get cleared until the task returns back to user space. But if another tracer were to request another deferred stacktrace, then the next task work will executed all tracer's callbacks that have their bits set in the task's unwind_mask. To fix this issue, add another mask called unwind_completed and place it into the task's info->cache structure. The cache structure is allocated on the first occurrence of a deferred stacktrace and this unwind_completed mask is not needed until then. It's better to have it in the cache than to permanently waste space in the task_struct. After a tracer's callback is executed, it's bit gets set in this unwind_completed mask. When the task_work enters, it will AND the task's unwind_mask with the inverse of the unwind_completed which will eliminate any work that already had its callback executed since the task entered the kernel. When the task leaves the kernel, it will reset this unwind_completed mask just like it resets the other values as it enters user space. Link: https://lore.kernel.org/all/20250716142609.47f0e4a5@batman.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182405.989222722@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 4 +++- include/linux/unwind_deferred_types.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index 337ead927d4d..b9ec4c8515c7 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -55,8 +55,10 @@ static __always_inline void unwind_reset_info(void) * depends on nr_entries being cleared on exit to user, * this needs to be a separate conditional. */ - if (unlikely(info->cache)) + if (unlikely(info->cache)) { info->cache->nr_entries = 0; + info->cache->unwind_completed = 0; + } } #else /* !CONFIG_UNWIND_USER */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index 5dc9cda141ff..33b62ac25c86 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -3,6 +3,7 @@ #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H struct unwind_cache { + unsigned long unwind_completed; unsigned int nr_entries; unsigned long entries[]; }; -- cgit v1.2.3 From 858fa8a3b083e862114bb6483b9fb50b3e2bc4c3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:12 -0400 Subject: unwind: Add USED bit to only have one conditional on way back to user space On the way back to user space, the function unwind_reset_info() is called unconditionally (but always inlined). It currently has two conditionals. One that checks the unwind_mask which is set whenever a deferred trace is called and is used to know that the mask needs to be cleared. The other checks if the cache has been allocated, and if so, it resets the nr_entries so that the unwinder knows it needs to do the work to get a new user space stack trace again (it only does it once per entering the kernel). Use one of the bits in the unwind mask as a "USED" bit that gets set whenever a trace is created. This will make it possible to only check the unwind_mask in the unwind_reset_info() to know if it needs to do work or not and eliminates a conditional that happens every time the task goes back to user space. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182406.155422551@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index b9ec4c8515c7..2efbda01e959 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -20,10 +20,14 @@ struct unwind_work { enum { UNWIND_PENDING_BIT = 0, + UNWIND_USED_BIT, }; enum { UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), + + /* Set if the unwinding was used (directly or deferred) */ + UNWIND_USED = BIT(UNWIND_USED_BIT) }; void unwind_task_init(struct task_struct *task); @@ -49,15 +53,11 @@ static __always_inline void unwind_reset_info(void) return; } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); current->unwind_info.id.id = 0; - } - /* - * As unwind_user_faultable() can be called directly and - * depends on nr_entries being cleared on exit to user, - * this needs to be a separate conditional. - */ - if (unlikely(info->cache)) { - info->cache->nr_entries = 0; - info->cache->unwind_completed = 0; + + if (unlikely(info->cache)) { + info->cache->nr_entries = 0; + info->cache->unwind_completed = 0; + } } } -- cgit v1.2.3 From b3b9cb11aa034cfa9eb880bb9bb3d5aaf732e479 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 29 Jul 2025 14:23:14 -0400 Subject: unwind: Finish up unwind when a task exits On do_exit() when a task is exiting, if a unwind is requested and the deferred user stacktrace is deferred via the task_work, the task_work callback is called after exit_mm() is called in do_exit(). This means that the user stack trace will not be retrieved and an empty stack is created. Instead, add a function unwind_deferred_task_exit() and call it just before exit_mm() so that the unwinder can call the requested callbacks with the user space stack. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Jens Axboe Cc: Florian Weimer Cc: Sam James Link: https://lore.kernel.org/20250729182406.504259474@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/unwind_deferred.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index 2efbda01e959..26122d00708a 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -39,6 +39,8 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); int unwind_deferred_request(struct unwind_work *work, u64 *cookie); void unwind_deferred_cancel(struct unwind_work *work); +void unwind_deferred_task_exit(struct task_struct *task); + static __always_inline void unwind_reset_info(void) { struct unwind_task_info *info = ¤t->unwind_info; @@ -71,6 +73,7 @@ static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } static inline void unwind_deferred_cancel(struct unwind_work *work) {} +static inline void unwind_deferred_task_exit(struct task_struct *task) {} static inline void unwind_reset_info(void) {} #endif /* !CONFIG_UNWIND_USER */ -- cgit v1.2.3 From b0c85e99458af829c32c225b43f638443bff14e5 Mon Sep 17 00:00:00 2001 From: Shaopeng Tan Date: Mon, 23 Jun 2025 16:46:45 +0900 Subject: cpumask: Remove unnecessary cpumask_nth_andnot() Commit 94f753143028("x86/resctrl: Optimize cpumask_any_housekeeping()") switched the only user of cpumask_nth_andnot() to other cpumask functions, but left the function cpumask_nth_andnot() unused. This makes function find_nth_andnot_bit() unused as well. Delete them. Signed-off-by: Shaopeng Tan Signed-off-by: Yury Norov [NVIDIA] --- include/linux/cpumask.h | 16 ---------------- include/linux/find.h | 27 --------------------------- 2 files changed, 43 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 39b71b662da3..4bda089fbe5d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -558,22 +558,6 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, small_cpumask_bits, cpumask_check(cpu)); } -/** - * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. - * @srcp1: the cpumask pointer - * @srcp2: the cpumask pointer - * @cpu: the Nth cpu to find, starting from 0 - * - * Return: >= nr_cpu_ids if such cpu doesn't exist. - */ -static __always_inline -unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, - const struct cpumask *srcp2) -{ - return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - small_cpumask_bits, cpumask_check(cpu)); -} - /** * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd. * @srcp1: the cpumask pointer diff --git a/include/linux/find.h b/include/linux/find.h index 98c61838002c..9d720ad92bc1 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -269,33 +269,6 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * return __find_nth_and_bit(addr1, addr2, size, n); } -/** - * find_nth_andnot_bit - find N'th set bit in 2 memory regions, - * flipping bits in 2nd region - * @addr1: The 1st address to start the search at - * @addr2: The 2nd address to start the search at - * @size: The maximum number of bits to search - * @n: The number of set bit, which position is needed, counting from 0 - * - * Returns the bit number of the N'th set bit. - * If no such, returns @size. - */ -static __always_inline -unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, - unsigned long size, unsigned long n) -{ - if (n >= size) - return size; - - if (small_const_nbits(size)) { - unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); - - return val ? fns(val, n) : size; - } - - return __find_nth_andnot_bit(addr1, addr2, size, n); -} - /** * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions, * excluding those set in 3rd region -- cgit v1.2.3 From 6d4471252ccc1722d25200fa9b6021ab4e1d6fde Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 9 Jun 2025 11:45:45 +0900 Subject: bits: split the definition of the asm and non-asm GENMASK*() In an upcoming change, the non-asm GENMASK*() will all be unified to depend on GENMASK_TYPE() which indirectly depend on sizeof(), something not available in asm. Instead of adding further complexity to GENMASK_TYPE() to make it work for both asm and non asm, just split the definition of the two variants. Signed-off-by: Vincent Mailhol Reviewed-by: Lucas De Marchi Signed-off-by: Yury Norov (NVIDIA) --- include/linux/bits.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bits.h b/include/linux/bits.h index 7ad056219115..13dbc8adc70e 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -35,6 +35,11 @@ #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) +#define GENMASK(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) +#define GENMASK_ULL(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) + /* * Generate a mask for the specified type @t. Additional checks are made to * guarantee the value returned fits in that type, relying on @@ -79,15 +84,11 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - #if !defined(__ASSEMBLY__) /* * Missing asm support -- cgit v1.2.3 From 104ea1c84b91c9f452e497ba51602b903711cdd5 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 9 Jun 2025 11:45:46 +0900 Subject: bits: unify the non-asm GENMASK*() The newly introduced GENMASK_TYPE() macro can also be used to generate the pre-existing non-asm GENMASK*() variants. Apply GENMASK_TYPE() to GENMASK(), GENMASK_ULL() and GENMASK_U128(). Signed-off-by: Vincent Mailhol Signed-off-by: Yury Norov (NVIDIA) --- include/linux/bits.h | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/bits.h b/include/linux/bits.h index 13dbc8adc70e..a40cc861b3a7 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include #include #include -#include #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -35,11 +33,6 @@ #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - /* * Generate a mask for the specified type @t. Additional checks are made to * guarantee the value returned fits in that type, relying on @@ -55,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -89,19 +86,4 @@ #endif /* !defined(__ASSEMBLY__) */ -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ -- cgit v1.2.3 From e2b02d382ae0cb90697e8529dfd3f93bf8c6905c Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 9 Jul 2025 10:38:08 +0100 Subject: bitfield: Ensure the return values of helper functions are checked As type##_replace_bits() has no side effects it is only useful if its return value is checked. Add __must_check to enforce this usage. To have the bits replaced in-place typep##_replace_bits() can be used instead. Although, type_##_get_bits() and type_##_encode_bits() are harder to misuse they are still only useful if the return value is checked. For consistency, also add __must_check to these. Signed-off-by: Ben Horgan Signed-off-by: Yury Norov (NVIDIA) --- include/linux/bitfield.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6d9a53db54b6..5355f8f806a9 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -189,14 +189,14 @@ static __always_inline u64 field_mask(u64 field) } #define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ -static __always_inline __##type type##_encode_bits(base v, base field) \ +static __always_inline __##type __must_check type##_encode_bits(base v, base field) \ { \ if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ __field_overflow(); \ return to((v & field_mask(field)) * field_multiplier(field)); \ } \ -static __always_inline __##type type##_replace_bits(__##type old, \ - base val, base field) \ +static __always_inline __##type __must_check type##_replace_bits(__##type old, \ + base val, base field) \ { \ return (old & ~to(field)) | type##_encode_bits(val, field); \ } \ @@ -205,7 +205,7 @@ static __always_inline void type##p_replace_bits(__##type *p, \ { \ *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ } \ -static __always_inline base type##_get_bits(__##type v, base field) \ +static __always_inline base __must_check type##_get_bits(__##type v, base field) \ { \ return (from(v) & field)/field_multiplier(field); \ } -- cgit v1.2.3 From 12df58ad294253ac1d8df0c9bb9cf726397a671d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:30 +0200 Subject: bpf: Add cookie object to bpf maps Add a cookie to BPF maps to uniquely identify BPF maps for the timespan when the node is up. This is different to comparing a pointer or BPF map id which could get rolled over and reused. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9cd2164ed23..308530c8326b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ struct bpf_map { bool free_after_rcu_gp; atomic64_t sleepable_refcnt; s64 __percpu *elem_count; + u64 cookie; /* write-once */ }; static inline const char *btf_field_type_name(enum btf_field_type type) -- cgit v1.2.3 From fd1c98f0ef5cbcec842209776505d9e70d8fcd53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:31 +0200 Subject: bpf: Move bpf map owner out of common struct Given this is only relevant for BPF tail call maps, it is adding up space and penalizing other map types. We also need to extend this with further objects to track / compare to. Therefore, lets move this out into a separate structure and dynamically allocate it only for BPF tail call maps. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 308530c8326b..a87646cc5398 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -260,6 +260,18 @@ struct bpf_list_node_kern { void *owner; } __attribute__((aligned(8))); +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + const struct btf_type *attach_func_proto; +}; + struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; @@ -292,18 +304,8 @@ struct bpf_map { struct rcu_head rcu; }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - const struct btf_type *attach_func_proto; - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; @@ -2109,6 +2111,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; -- cgit v1.2.3 From 9621e60f59eae87eb9ffe88d90f24f391a1ef0f0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:32 +0200 Subject: bpf: Move cgroup iterator helpers to bpf.h Move them into bpf.h given we also need them in core code. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 5 ----- include/linux/bpf.h | 22 ++++++++++++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 082ccd8ad96b..aedf573bdb42 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -77,9 +77,6 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype]) -#define for_each_cgroup_storage_type(stype) \ - for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) - struct bpf_cgroup_storage_map; struct bpf_storage_buffer { @@ -510,8 +507,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) -#define for_each_cgroup_storage_type(stype) for (; false; ) - #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a87646cc5398..02aa41e301a5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -208,6 +208,20 @@ enum btf_field_type { BPF_RES_SPIN_LOCK = (1 << 12), }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, + __BPF_CGROUP_STORAGE_MAX +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +}; + +#ifdef CONFIG_CGROUP_BPF +# define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) +#else +# define for_each_cgroup_storage_type(stype) for (; false; ) +#endif /* CONFIG_CGROUP_BPF */ + typedef void (*btf_dtor_kfunc_t)(void *); struct btf_field_kptr { @@ -1085,14 +1099,6 @@ struct bpf_prog_offload { u32 jited_len; }; -enum bpf_cgroup_storage_type { - BPF_CGROUP_STORAGE_SHARED, - BPF_CGROUP_STORAGE_PERCPU, - __BPF_CGROUP_STORAGE_MAX -}; - -#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX - /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ -- cgit v1.2.3 From abad3d0bad72a52137e0c350c59542d75ae4f513 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:33 +0200 Subject: bpf: Fix oob access in cgroup local storage Lonial reported that an out-of-bounds access in cgroup local storage can be crafted via tail calls. Given two programs each utilizing a cgroup local storage with a different value size, and one program doing a tail call into the other. The verifier will validate each of the indivial programs just fine. However, in the runtime context the bpf_cg_run_ctx holds an bpf_prog_array_item which contains the BPF program as well as any cgroup local storage flavor the program uses. Helpers such as bpf_get_local_storage() pick this up from the runtime context: ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); storage = ctx->prog_item->cgroup_storage[stype]; if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; else ptr = this_cpu_ptr(storage->percpu_buf); For the second program which was called from the originally attached one, this means bpf_get_local_storage() will pick up the former program's map, not its own. With mismatching sizes, this can result in an unintended out-of-bounds access. To fix this issue, we need to extend bpf_map_owner with an array of storage_cookie[] to match on i) the exact maps from the original program if the second program was using bpf_get_local_storage(), or ii) allow the tail call combination if the second program was not using any of the cgroup local storage maps. Fixes: 7d9c3427894f ("bpf: Make cgroup storages shared between programs on the same cgroup") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-4-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 02aa41e301a5..cc700925b802 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -283,6 +283,7 @@ struct bpf_map_owner { enum bpf_prog_type type; bool jited; bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; }; -- cgit v1.2.3 From e2ba58ccc9099514380c3300cbc0750b5055fc1c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 30 Jul 2025 21:49:53 -0700 Subject: block: Fix default IO priority if there is no IO context Upstream commit 53889bcaf536 ("block: make __get_task_ioprio() easier to read") changes the IO priority returned to the caller if no IO context is defined for the task. Prior to this commit, the returned IO priority was determined by task_nice_ioclass() and task_nice_ioprio(). Now it is always IOPRIO_DEFAULT, which translates to IOPRIO_CLASS_NONE with priority 0. However, task_nice_ioclass() returns IOPRIO_CLASS_IDLE, IOPRIO_CLASS_RT, or IOPRIO_CLASS_BE depending on the task scheduling policy, and task_nice_ioprio() returns a value determined by task_nice(). This causes regressions in test code checking the IO priority and class of IO operations on tasks with no IO context. Fix the problem by returning the IO priority calculated from task_nice_ioclass() and task_nice_ioprio() if no IO context is defined to match earlier behavior. Fixes: 53889bcaf536 ("block: make __get_task_ioprio() easier to read") Cc: Jens Axboe Cc: Bart Van Assche Signed-off-by: Guenter Roeck Reviewed-by: Yu Kuai Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250731044953.1852690-1-linux@roeck-us.net Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index b25377b6ea98..5210e8371238 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,7 +60,8 @@ static inline int __get_task_ioprio(struct task_struct *p) int prio; if (!ioc) - return IOPRIO_DEFAULT; + return IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); if (p != current) lockdep_assert_held(&p->alloc_lock); -- cgit v1.2.3 From 5ccaeedb489b41ce6cb857d0de488992746be282 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 1 Aug 2025 00:10:06 +0000 Subject: cfi: add C CFI type macro Currently x86 and riscv open-code 4 instances of the same logic to define a u32 variable with the KCFI typeid of a given function. Replace the duplicate logic with a common macro. Signed-off-by: Mark Rutland Co-developed-by: Maxwell Bland Signed-off-by: Maxwell Bland Co-developed-by: Sami Tolvanen Signed-off-by: Sami Tolvanen Tested-by: Dao Huang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20250801001004.1859976-6-samitolvanen@google.com Signed-off-by: Alexei Starovoitov --- include/linux/cfi_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/include/linux/cfi_types.h +++ b/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_ */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ -- cgit v1.2.3 From f1befc82addda926c8301436123d041bf3249505 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 1 Aug 2025 00:10:07 +0000 Subject: cfi: Move BPF CFI types and helpers to generic code Instead of duplicating the same code for each architecture, move the CFI type hash variables for BPF function types and related helper functions to generic CFI code, and allow architectures to override the function definitions if needed. Signed-off-by: Sami Tolvanen Link: https://lore.kernel.org/r/20250801001004.1859976-7-samitolvanen@google.com Signed-off-by: Alexei Starovoitov --- include/linux/cfi.h | 47 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 1db17ecbb86c..52a98886a455 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,16 +11,9 @@ #include #include +#ifdef CONFIG_CFI_CLANG extern bool cfi_warn; -#ifndef cfi_get_offset -static inline int cfi_get_offset(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, unsigned long *target, u32 type); @@ -29,6 +22,44 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } + +#ifndef cfi_get_offset +/* + * Returns the CFI prefix offset. By default, the compiler emits only + * a 4-byte CFI type hash before the function. If an architecture + * uses -fpatchable-function-entry=N,M where M>0 to change the prefix + * offset, they must override this function. + */ +static inline int cfi_get_offset(void) +{ + return 4; +} +#endif + +#ifndef cfi_get_func_hash +static inline u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif + +/* CFI type hashes for BPF function types */ +extern u32 cfi_bpf_hash; +extern u32 cfi_bpf_subprog_hash; + +#else /* CONFIG_CFI_CLANG */ + +static inline int cfi_get_offset(void) { return 0; } +static inline u32 cfi_get_func_hash(void *func) { return 0; } + +#define cfi_bpf_hash 0U +#define cfi_bpf_subprog_hash 0U + #endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS -- cgit v1.2.3 From 55a984928bfa30c7877e28f16910e6de1c170f1f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 1 Aug 2025 10:26:13 +0200 Subject: Revert "tty: vt: use _IO() to define ioctl numbers" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f1180ca37abe3d117e4a19be12142fe722612a7c. Since the commit, the vt ioctl numbers are defined differently on platforms where _IOC_NONE is non-zero: alpha, mips, powerpc, sparc. Signed-off-by: "Jiri Slaby (SUSE)" Reported-by: Christophe Leroy Link: https://lore.kernel.org/all/436489B9-E67B-4630-909F-386C30A2AAC9@xenosoft.de/ Link: https://lore.kernel.org/all/97ec2636-915a-498c-903b-d66957420d21@csgroup.eu/ Cc: Nicolas Pitre Cc: Ilpo Järvinen Link: https://lore.kernel.org/r/20250801082613.2564584-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vt.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index b60fcdfb2746..714483d68c69 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -14,9 +14,9 @@ /* Note: the ioctl VT_GETSTATE does not work for consoles 16 and higher (since it returns a short) */ -/* 'V' to avoid collision with termios and kd */ +/* 0x56 is 'V', to avoid collision with termios and kd */ -#define VT_OPENQRY _IO('V', 0x00) /* find available vt */ +#define VT_OPENQRY 0x5600 /* find available vt */ struct vt_mode { __u8 mode; /* vt mode */ @@ -25,8 +25,8 @@ struct vt_mode { __s16 acqsig; /* signal to raise on acquisition */ __s16 frsig; /* unused (set to 0) */ }; -#define VT_GETMODE _IO('V', 0x01) /* get mode of active vt */ -#define VT_SETMODE _IO('V', 0x02) /* set mode of active vt */ +#define VT_GETMODE 0x5601 /* get mode of active vt */ +#define VT_SETMODE 0x5602 /* set mode of active vt */ #define VT_AUTO 0x00 /* auto vt switching */ #define VT_PROCESS 0x01 /* process controls switching */ #define VT_ACKACQ 0x02 /* acknowledge switch */ @@ -36,21 +36,21 @@ struct vt_stat { __u16 v_signal; /* signal to send */ __u16 v_state; /* vt bitmask */ }; -#define VT_GETSTATE _IO('V', 0x03) /* get global vt state info */ -#define VT_SENDSIG _IO('V', 0x04) /* signal to send to bitmask of vts */ +#define VT_GETSTATE 0x5603 /* get global vt state info */ +#define VT_SENDSIG 0x5604 /* signal to send to bitmask of vts */ -#define VT_RELDISP _IO('V', 0x05) /* release display */ +#define VT_RELDISP 0x5605 /* release display */ -#define VT_ACTIVATE _IO('V', 0x06) /* make vt active */ -#define VT_WAITACTIVE _IO('V', 0x07) /* wait for vt active */ -#define VT_DISALLOCATE _IO('V', 0x08) /* free memory associated to vt */ +#define VT_ACTIVATE 0x5606 /* make vt active */ +#define VT_WAITACTIVE 0x5607 /* wait for vt active */ +#define VT_DISALLOCATE 0x5608 /* free memory associated to vt */ struct vt_sizes { __u16 v_rows; /* number of rows */ __u16 v_cols; /* number of columns */ __u16 v_scrollsize; /* number of lines of scrollback */ }; -#define VT_RESIZE _IO('V', 0x09) /* set kernel's idea of screensize */ +#define VT_RESIZE 0x5609 /* set kernel's idea of screensize */ struct vt_consize { __u16 v_rows; /* number of rows */ @@ -60,10 +60,10 @@ struct vt_consize { __u16 v_vcol; /* number of pixel columns on screen */ __u16 v_ccol; /* number of pixel columns per character */ }; -#define VT_RESIZEX _IO('V', 0x0A) /* set kernel's idea of screensize + more */ -#define VT_LOCKSWITCH _IO('V', 0x0B) /* disallow vt switching */ -#define VT_UNLOCKSWITCH _IO('V', 0x0C) /* allow vt switching */ -#define VT_GETHIFONTMASK _IO('V', 0x0D) /* return hi font mask */ +#define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ +#define VT_LOCKSWITCH 0x560B /* disallow vt switching */ +#define VT_UNLOCKSWITCH 0x560C /* allow vt switching */ +#define VT_GETHIFONTMASK 0x560D /* return hi font mask */ struct vt_event { __u32 event; @@ -77,14 +77,14 @@ struct vt_event { __u32 pad[4]; /* Padding for expansion */ }; -#define VT_WAITEVENT _IO('V', 0x0E) /* Wait for an event */ +#define VT_WAITEVENT 0x560E /* Wait for an event */ struct vt_setactivate { __u32 console; struct vt_mode mode; }; -#define VT_SETACTIVATE _IO('V', 0x0F) /* Activate and set the mode of a console */ +#define VT_SETACTIVATE 0x560F /* Activate and set the mode of a console */ /* get console size and cursor position */ struct vt_consizecsrpos { -- cgit v1.2.3 From 9d9b193ed73a65ec47cf1fd39925b09da8216461 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 31 Jul 2025 09:41:47 +0800 Subject: crypto: hash - Increase HASH_MAX_DESCSIZE for hmac(sha3-224-s390) The value of HASH_MAX_DESCSIZE is off by one for hmac(sha3-224-s390). Fix this so that hmac(sha3-224-s390) can be registered. Reported-by: Ingo Franzki Reported-by: Eric Biggers Fixes: 6f90ba706551 ("crypto: s390/sha3 - Use API partial block handling") Cc: Signed-off-by: Herbert Xu --- include/crypto/hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 6f6b9de12cd3..ed63b904837d 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -184,7 +184,7 @@ struct shash_desc { * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc' * containing a 'struct s390_sha_ctx'. */ -#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) +#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 361) #define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \ HASH_MAX_DESCSIZE) -- cgit v1.2.3 From 564a69ad90d15c782176e1a8c9e1c95661e1aed0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 21 May 2025 17:03:46 +0530 Subject: virtio-mmio: Remove virtqueue list from mmio device The MMIO transport implementation creates a list of virtqueues for a virtio device, while the same is already available in the struct virtio_device. Don't create a duplicate list, and use the other one instead. While at it, fix the virtio_device_for_each_vq() macro to accept an argument like "&vm_dev->vdev" (which currently fails to build). Signed-off-by: Viresh Kumar Message-Id: <3e56c6f74002987e22f364d883cbad177cd9ad9c.1747827066.git.viresh.kumar@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/linux/virtio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 64cb4b04be7a..8b745ce0cf5f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -196,7 +196,7 @@ int virtio_device_reset_done(struct virtio_device *dev); size_t virtio_max_dma_size(const struct virtio_device *vdev); #define virtio_device_for_each_vq(vdev, vq) \ - list_for_each_entry(vq, &vdev->vqs, list) + list_for_each_entry(vq, &(vdev)->vqs, list) /** * struct virtio_driver - operations for a virtio I/O driver -- cgit v1.2.3 From 569c392e191361cd05fba1fd87ed02ef0d130ef7 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 17 Jun 2025 01:18:36 +0100 Subject: vhost: vringh: Remove unused iotlb functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions: vringh_abandon_iotlb() vringh_notify_disable_iotlb() and vringh_notify_enable_iotlb() were added in 2020 by commit 9ad9c49cfe97 ("vringh: IOTLB support") but have remained unused. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Simon Horman Message-Id: <20250617001838.114457-2-linux@treblig.org> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Tested-by: Lei Yang --- include/linux/vringh.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index c3a8117dabe8..af8bd2695a7b 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -319,13 +319,8 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh, struct vringh_kiov *wiov, const void *src, size_t len); -void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num); - int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len); -bool vringh_notify_enable_iotlb(struct vringh *vrh); -void vringh_notify_disable_iotlb(struct vringh *vrh); - int vringh_need_notify_iotlb(struct vringh *vrh); #endif /* CONFIG_VHOST_IOTLB */ -- cgit v1.2.3 From 6e9ef6937c726b97d4a6d49332d06e999acc15f5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 17 Jun 2025 01:18:37 +0100 Subject: vhost: vringh: Remove unused functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions: vringh_abandon_kern() vringh_abandon_user() vringh_iov_pull_kern() and vringh_iov_push_kern() were all added in 2013 by commit f87d0fbb5798 ("vringh: host-side implementation of virtio rings.") but have remained unused. Remove them and the two helper functions they used. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20250617001838.114457-3-linux@treblig.org> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Tested-by: Lei Yang Reviewed-by: Simon Horman --- include/linux/vringh.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index af8bd2695a7b..49e7cbc9697a 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -175,9 +175,6 @@ int vringh_complete_multi_user(struct vringh *vrh, const struct vring_used_elem used[], unsigned num_used); -/* Pretend we've never seen descriptor (for easy error handling). */ -void vringh_abandon_user(struct vringh *vrh, unsigned int num); - /* Do we need to fire the eventfd to notify the other side? */ int vringh_need_notify_user(struct vringh *vrh); @@ -235,10 +232,6 @@ int vringh_getdesc_kern(struct vringh *vrh, u16 *head, gfp_t gfp); -ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len); -ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, - const void *src, size_t len); -void vringh_abandon_kern(struct vringh *vrh, unsigned int num); int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len); bool vringh_notify_enable_kern(struct vringh *vrh); -- cgit v1.2.3 From 7d9896e9f6d02d8aa85e63f736871f96c59a5263 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Mon, 14 Jul 2025 15:12:32 +0800 Subject: vhost: Reintroduce kthread API and add mode selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads"), the vhost uses vhost_task and operates as a child of the owner thread. This is required for correct CPU usage accounting, especially when using containers. However, this change has caused confusion for some legacy userspace applications, and we didn't notice until it's too late. Unfortunately, it's too late to revert - we now have userspace depending both on old and new behaviour :( To address the issue, reintroduce kthread mode for vhost workers and provide a configuration to select between kthread and task worker. - Add 'fork_owner' parameter to vhost_dev to let users select kthread or task mode. Default mode is task mode(VHOST_FORK_OWNER_TASK). - Reintroduce kthread mode support: * Bring back the original vhost_worker() implementation, and renamed to vhost_run_work_kthread_list(). * Add cgroup support for the kthread * Introduce struct vhost_worker_ops: - Encapsulates create / stop / wake‑up callbacks. - vhost_worker_create() selects the proper ops according to inherit_owner. - Userspace configuration interface: * New IOCTLs: - VHOST_SET_FORK_FROM_OWNER lets userspace select task mode (VHOST_FORK_OWNER_TASK) or kthread mode (VHOST_FORK_OWNER_KTHREAD) - VHOST_GET_FORK_FROM_OWNER reads the current worker mode * Expose module parameter 'fork_from_owner_default' to allow system administrators to configure the default mode for vhost workers * Kconfig option CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL controls whether these IOCTLs and the parameter are available - The VHOST_NEW_WORKER functionality requires fork_owner to be set to true, with validation added to ensure proper configuration This partially reverts or improves upon: commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray") Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads"), Signed-off-by: Cindy Lu Message-Id: <20250714071333.59794-2-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Tested-by: Lei Yang --- include/uapi/linux/vhost.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index d4b3e2ae1314..e72f2655459e 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -235,4 +235,33 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* fork_owner values for vhost */ +#define VHOST_FORK_OWNER_KTHREAD 0 +#define VHOST_FORK_OWNER_TASK 1 + +/** + * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device, + * This ioctl must called before VHOST_SET_OWNER. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @param fork_owner: An 8-bit value that determines the vhost thread mode + * + * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value): + * - Vhost will create vhost worker as tasks forked from the owner, + * inheriting all of the owner's attributes. + * + * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: + * - Vhost will create vhost workers as kernel threads. + */ +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) + +/** + * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @return: An 8-bit value indicating the current thread mode. + */ +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) + #endif -- cgit v1.2.3 From 87dbae5e36613a6020f3d64a2eaeac0a1e0e6dc6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:10 +0100 Subject: vsock/virtio: Move length check to callers of virtio_vsock_skb_rx_put() virtio_vsock_skb_rx_put() only calls skb_put() if the length in the packet header is not zero even though skb_put() handles this case gracefully. Remove the functionally redundant check from virtio_vsock_skb_rx_put() and, on the assumption that this is a worthwhile optimisation for handling credit messages, augment the existing length checks in virtio_transport_rx_work() to elide the call for zero-length payloads. Since the callers all have the length, extend virtio_vsock_skb_rx_put() to take it as an additional parameter rather than fish it back out of the packet header. Note that the vhost code already has similar logic in vhost_vsock_alloc_skb(). Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-4-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 36fb3edfa403..97465f378ade 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -47,14 +47,9 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } -static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb) +static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) { - u32 len; - - len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - if (len > 0) - skb_put(skb, len); + skb_put(skb, len); } static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) -- cgit v1.2.3 From 03a92f036a04fed2b00d69f5f46f1a486e70dc5c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:11 +0100 Subject: vsock/virtio: Resize receive buffers so that each SKB fits in a 4K page When allocating receive buffers for the vsock virtio RX virtqueue, an SKB is allocated with a 4140 data payload (the 44-byte packet header + VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE). Even when factoring in the SKB overhead, the resulting 8KiB allocation thanks to the rounding in kmalloc_reserve() is wasteful (~3700 unusable bytes) and results in a higher-order page allocation on systems with 4KiB pages just for the sake of a few hundred bytes of packet data. Limit the vsock virtio RX buffers to 4KiB per SKB, resulting in much better memory utilisation and removing the need to allocate higher-order pages entirely. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-5-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 97465f378ade..879f1dfa7d3a 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -106,7 +106,12 @@ static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) return (size_t)(skb_end_pointer(skb) - skb->head); } -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +/* Dimension the RX SKB so that the entire thing fits exactly into + * a single 4KiB page. This avoids wasting memory due to alloc_skb() + * rounding up to the next page order and also means that we + * don't leave higher-order pages sitting around in the RX queue. + */ +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -- cgit v1.2.3 From 2304c64a2866c58534560c63dc6e79d09b8f8d8d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:12 +0100 Subject: vsock/virtio: Rename virtio_vsock_alloc_skb() In preparation for nonlinear allocations for large SKBs, rename virtio_vsock_alloc_skb() to virtio_vsock_alloc_linear_skb() to indicate that it returns linear SKBs unconditionally and switch all callers over to this new interface for now. No functional change. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-6-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 879f1dfa7d3a..4504ea29ff82 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -52,7 +52,8 @@ static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) skb_put(skb, len); } -static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +static inline struct sk_buff * +virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) { struct sk_buff *skb; -- cgit v1.2.3 From fac6b82e0f3eaca33c8c67ec401681b21143ae17 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:13 +0100 Subject: vsock/virtio: Move SKB allocation lower-bound check to callers virtio_vsock_alloc_linear_skb() checks that the requested size is at least big enough for the packet header (VIRTIO_VSOCK_SKB_HEADROOM). Of the three callers of virtio_vsock_alloc_linear_skb(), only vhost_vsock_alloc_skb() can potentially pass a packet smaller than the header size and, as it already has a check against the maximum packet size, extend its bounds checking to consider the minimum packet size and remove the check from virtio_vsock_alloc_linear_skb(). Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-7-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 4504ea29ff82..36dd0cd55368 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -57,9 +57,6 @@ virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) { struct sk_buff *skb; - if (size < VIRTIO_VSOCK_SKB_HEADROOM) - return NULL; - skb = alloc_skb(size, mask); if (!skb) return NULL; -- cgit v1.2.3 From ab9aa2f3afc2713c14f6c4c6b90c9a0933b837f1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:14 +0100 Subject: vhost/vsock: Allocate nonlinear SKBs for handling large receive buffers When receiving a packet from a guest, vhost_vsock_handle_tx_kick() calls vhost_vsock_alloc_linear_skb() to allocate and fill an SKB with the receive data. Unfortunately, these are always linear allocations and can therefore result in significant pressure on kmalloc() considering that the maximum packet size (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) is a little over 64KiB, resulting in a 128KiB allocation for each packet. Rework the vsock SKB allocation so that, for sizes with page order greater than PAGE_ALLOC_COSTLY_ORDER, a nonlinear SKB is allocated instead with the packet header in the SKB and the receive data in the fragments. Finally, add a debug warning if virtio_vsock_skb_rx_put() is ever called on an SKB with a non-zero length, as this would be destructive for the nonlinear case. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-8-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 36dd0cd55368..fa5934ea9c81 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -49,22 +49,48 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) { - skb_put(skb, len); + DEBUG_NET_WARN_ON_ONCE(skb->len); + + if (skb_is_nonlinear(skb)) + skb->len = len; + else + skb_put(skb, len); } static inline struct sk_buff * -virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) +__virtio_vsock_alloc_skb_with_frags(unsigned int header_len, + unsigned int data_len, + gfp_t mask) { struct sk_buff *skb; + int err; - skb = alloc_skb(size, mask); + skb = alloc_skb_with_frags(header_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, &err, mask); if (!skb) return NULL; skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); + skb->data_len = data_len; return skb; } +static inline struct sk_buff * +virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) +{ + return __virtio_vsock_alloc_skb_with_frags(size, 0, mask); +} + +static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +{ + if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + return virtio_vsock_alloc_linear_skb(size, mask); + + size -= VIRTIO_VSOCK_SKB_HEADROOM; + return __virtio_vsock_alloc_skb_with_frags(VIRTIO_VSOCK_SKB_HEADROOM, + size, mask); +} + static inline void virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb) { -- cgit v1.2.3 From 8ca76151d2c8219edea82f1925a2a25907ff6a9d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2025 10:01:15 +0100 Subject: vsock/virtio: Rename virtio_vsock_skb_rx_put() In preparation for using virtio_vsock_skb_rx_put() when populating SKBs on the vsock TX path, rename virtio_vsock_skb_rx_put() to virtio_vsock_skb_put(). No functional change. Reviewed-by: Stefano Garzarella Signed-off-by: Will Deacon Message-Id: <20250717090116.11987-9-will@kernel.org> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index fa5934ea9c81..0c67543a45c8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -47,7 +47,7 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } -static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb, u32 len) +static inline void virtio_vsock_skb_put(struct sk_buff *skb, u32 len) { DEBUG_NET_WARN_ON_ONCE(skb->len); -- cgit v1.2.3 From 9843cf7b6fd6f938c16fde51e86dd0e3ddbefb12 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Fri, 1 Aug 2025 10:16:18 +0800 Subject: ASoC: tas2781: Fix the wrong step for TLV on tas2781 The step for TLV on tas2781, should be 50 (-0.5dB). Fixes: 678f38eba1f2 ("ASoC: tas2781: Add Header file for tas2781 driver") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250801021618.64627-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-tlv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index d87263e43fdb..ef9b9f19d212 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 50, 0); static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); #endif -- cgit v1.2.3 From 788fa4b47cdcd9b3d8c2d02ac0b3cd2540305f18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:24 -0400 Subject: tracing: Add guard(ring_buffer_nest) Some calls to the tracing ring buffer can happen when the ring buffer is already being written to by the same context (for example, a trace_printk() in between a ring_buffer_lock_reserve() and a ring_buffer_unlock_commit()). In order to not trigger the recursion detection, these functions use ring_buffer_nest_start() and ring_buffer_nest_end(). Create a guard() for these functions so that their use cases can be simplified and not need to use goto for the release. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.710501021@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index cd7f0ae26615..8253cb69540c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer, void ring_buffer_nest_start(struct trace_buffer *buffer); void ring_buffer_nest_end(struct trace_buffer *buffer); +DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *, + ring_buffer_nest_start(_T), ring_buffer_nest_end(_T)) + struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); -- cgit v1.2.3 From d45cf1e7d7180256e17c9ce88e32e8061a7887fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jul 2025 13:17:38 +0000 Subject: ipv6: reject malicious packets in ipv6_gso_segment() syzbot was able to craft a packet with very long IPv6 extension headers leading to an overflow of skb->transport_header. This 16bit field has a limited range. Add skb_reset_transport_header_careful() helper and use it from ipv6_gso_segment() WARNING: CPU: 0 PID: 5871 at ./include/linux/skbuff.h:3032 skb_reset_transport_header include/linux/skbuff.h:3032 [inline] WARNING: CPU: 0 PID: 5871 at ./include/linux/skbuff.h:3032 ipv6_gso_segment+0x15e2/0x21e0 net/ipv6/ip6_offload.c:151 Modules linked in: CPU: 0 UID: 0 PID: 5871 Comm: syz-executor211 Not tainted 6.16.0-rc6-syzkaller-g7abc678e3084 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:skb_reset_transport_header include/linux/skbuff.h:3032 [inline] RIP: 0010:ipv6_gso_segment+0x15e2/0x21e0 net/ipv6/ip6_offload.c:151 Call Trace: skb_mac_gso_segment+0x31c/0x640 net/core/gso.c:53 nsh_gso_segment+0x54a/0xe10 net/nsh/nsh.c:110 skb_mac_gso_segment+0x31c/0x640 net/core/gso.c:53 __skb_gso_segment+0x342/0x510 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x857/0x11b0 net/core/dev.c:3950 validate_xmit_skb_list+0x84/0x120 net/core/dev.c:4000 sch_direct_xmit+0xd3/0x4b0 net/sched/sch_generic.c:329 __dev_xmit_skb net/core/dev.c:4102 [inline] __dev_queue_xmit+0x17b6/0x3a70 net/core/dev.c:4679 Fixes: d1da932ed4ec ("ipv6: Separate ipv6 offload support") Reported-by: syzbot+af43e647fd835acc02df@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/688a1a05.050a0220.5d226.0008.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Dawid Osuchowski Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250730131738.3385939-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b8b06e71b73e..14b923ddb6df 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,6 +3033,29 @@ static inline void skb_reset_transport_header(struct sk_buff *skb) skb->transport_header = offset; } +/** + * skb_reset_transport_header_careful - conditionally reset transport header + * @skb: buffer to alter + * + * Hardened version of skb_reset_transport_header(). + * + * Returns: true if the operation was a success. + */ +static inline bool __must_check +skb_reset_transport_header_careful(struct sk_buff *skb) +{ + long offset = skb->data - skb->head; + + if (unlikely(offset != (typeof(skb->transport_header))offset)) + return false; + + if (unlikely(offset == (typeof(skb->transport_header))~0U)) + return false; + + skb->transport_header = offset; + return true; +} + static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { -- cgit v1.2.3 From d46e51f1c78b9ab9323610feb14238d06d46d519 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Wed, 30 Jul 2025 18:14:58 +0800 Subject: net: drop UFO packets in udp_rcv_segment() When sending a packet with virtio_net_hdr to tun device, if the gso_type in virtio_net_hdr is SKB_GSO_UDP and the gso_size is less than udphdr size, below crash may happen. ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:4572! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 0 UID: 0 PID: 62 Comm: mytest Not tainted 6.16.0-rc7 #203 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:skb_pull_rcsum+0x8e/0xa0 Code: 00 00 5b c3 cc cc cc cc 8b 93 88 00 00 00 f7 da e8 37 44 38 00 f7 d8 89 83 88 00 00 00 48 8b 83 c8 00 00 00 5b c3 cc cc cc cc <0f> 0b 0f 0b 66 66 2e 0f 1f 84 00 000 RSP: 0018:ffffc900001fba38 EFLAGS: 00000297 RAX: 0000000000000004 RBX: ffff8880040c1000 RCX: ffffc900001fb948 RDX: ffff888003e6d700 RSI: 0000000000000008 RDI: ffff88800411a062 RBP: ffff8880040c1000 R08: 0000000000000000 R09: 0000000000000001 R10: ffff888003606c00 R11: 0000000000000001 R12: 0000000000000000 R13: ffff888004060900 R14: ffff888004050000 R15: ffff888004060900 FS: 000000002406d3c0(0000) GS:ffff888084a19000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000040 CR3: 0000000004007000 CR4: 00000000000006f0 Call Trace: udp_queue_rcv_one_skb+0x176/0x4b0 net/ipv4/udp.c:2445 udp_queue_rcv_skb+0x155/0x1f0 net/ipv4/udp.c:2475 udp_unicast_rcv_skb+0x71/0x90 net/ipv4/udp.c:2626 __udp4_lib_rcv+0x433/0xb00 net/ipv4/udp.c:2690 ip_protocol_deliver_rcu+0xa6/0x160 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x72/0x90 net/ipv4/ip_input.c:233 ip_sublist_rcv_finish+0x5f/0x70 net/ipv4/ip_input.c:579 ip_sublist_rcv+0x122/0x1b0 net/ipv4/ip_input.c:636 ip_list_rcv+0xf7/0x130 net/ipv4/ip_input.c:670 __netif_receive_skb_list_core+0x21d/0x240 net/core/dev.c:6067 netif_receive_skb_list_internal+0x186/0x2b0 net/core/dev.c:6210 napi_complete_done+0x78/0x180 net/core/dev.c:6580 tun_get_user+0xa63/0x1120 drivers/net/tun.c:1909 tun_chr_write_iter+0x65/0xb0 drivers/net/tun.c:1984 vfs_write+0x300/0x420 fs/read_write.c:593 ksys_write+0x60/0xd0 fs/read_write.c:686 do_syscall_64+0x50/0x1c0 arch/x86/entry/syscall_64.c:63 To trigger gso segment in udp_queue_rcv_skb(), we should also set option UDP_ENCAP_ESPINUDP to enable udp_sk(sk)->encap_rcv. When the encap_rcv hook return 1 in udp_queue_rcv_one_skb(), udp_csum_pull_header() will try to pull udphdr, but the skb size has been segmented to gso size, which leads to this crash. Previous commit cf329aa42b66 ("udp: cope with UDP GRO packet misdirection") introduces segmentation in UDP receive path only for GRO, which was never intended to be used for UFO, so drop UFO packets in udp_rcv_segment(). Link: https://lore.kernel.org/netdev/20250724083005.3918375-1-wangliang74@huawei.com/ Link: https://lore.kernel.org/netdev/20250729123907.3318425-1-wangliang74@huawei.com/ Fixes: cf329aa42b66 ("udp: cope with UDP GRO packet misdirection") Suggested-by: Willem de Bruijn Signed-off-by: Wang Liang Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250730101458.3470788-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index f8ae2c4ade14..e2af3bda90c9 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -586,6 +586,16 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, { netdev_features_t features = NETIF_F_SG; struct sk_buff *segs; + int drop_count; + + /* + * Segmentation in UDP receive path is only for UDP GRO, drop udp + * fragmentation offload (UFO) packets. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) { + drop_count = 1; + goto drop; + } /* Avoid csum recalculation by skb_segment unless userspace explicitly * asks for the final checksum values @@ -609,16 +619,18 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, */ segs = __skb_gso_segment(skb, features, false); if (IS_ERR_OR_NULL(segs)) { - int segs_nr = skb_shinfo(skb)->gso_segs; - - atomic_add(segs_nr, &sk->sk_drops); - SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); - kfree_skb(skb); - return NULL; + drop_count = skb_shinfo(skb)->gso_segs; + goto drop; } consume_skb(skb); return segs; + +drop: + atomic_add(drop_count, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count); + kfree_skb(skb); + return NULL; } static inline void udp_post_segment_fix_csum(struct sk_buff *skb) -- cgit v1.2.3 From 1dbf1d590d10a6d1978e8184f8dfe20af22d680a Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 30 Jul 2025 16:21:18 +0530 Subject: net: Add locking to protect skb->dev access in ip_output In ip_output() skb->dev is updated from the skb_dst(skb)->dev this can become invalid when the interface is unregistered and freed, Introduced new skb_dst_dev_rcu() function to be used instead of skb_dst_dev() within rcu_locks in ip_output.This will ensure that all the skb's associated with the dev being deregistered will be transnmitted out first, before freeing the dev. Given that ip_output() is called within an rcu_read_lock() critical section or from a bottom-half context, it is safe to introduce an RCU read-side critical section within it. Multiple panic call stacks were observed when UL traffic was run in concurrency with device deregistration from different functions, pasting one sample for reference. [496733.627565][T13385] Call trace: [496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0 [496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498 [496733.627595][T13385] ip_finish_output+0xa4/0xf4 [496733.627605][T13385] ip_output+0x100/0x1a0 [496733.627613][T13385] ip_send_skb+0x68/0x100 [496733.627618][T13385] udp_send_skb+0x1c4/0x384 [496733.627625][T13385] udp_sendmsg+0x7b0/0x898 [496733.627631][T13385] inet_sendmsg+0x5c/0x7c [496733.627639][T13385] __sys_sendto+0x174/0x1e4 [496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c [496733.627653][T13385] invoke_syscall+0x58/0x11c [496733.627662][T13385] el0_svc_common+0x88/0xf4 [496733.627669][T13385] do_el0_svc+0x2c/0xb0 [496733.627676][T13385] el0_svc+0x2c/0xa4 [496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4 [496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8 Changes in v3: - Replaced WARN_ON() with WARN_ON_ONCE(), as suggested by Willem de Bruijn. - Dropped legacy lines mistakenly pulled in from an outdated branch. Changes in v2: - Addressed review comments from Eric Dumazet - Used READ_ONCE() to prevent potential load/store tearing - Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output Signed-off-by: Sharath Chandra Vurukala Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 00467c1b5093..bab01363bb97 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -568,11 +568,23 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) return READ_ONCE(dst->dev); } +static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) +{ + /* In the future, use rcu_dereference(dst->dev) */ + WARN_ON_ONCE(!rcu_read_lock_held()); + return READ_ONCE(dst->dev); +} + static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) { return dst_dev(skb_dst(skb)); } +static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) +{ + return dst_dev_rcu(skb_dst(skb)); +} + static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) { return dev_net(skb_dst_dev(skb)); -- cgit v1.2.3 From 6c6d8f8ba7789c221a2e4c43a0ed982c7a41f428 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 16 Jul 2025 14:32:45 +0100 Subject: lib/xxhash: remove unused functions xxh32_digest() and xxh32_update() were added in 2017 in the original xxhash commit, but have remained unused. Remove them. Link: https://lkml.kernel.org/r/20250716133245.243363-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Christoph Hellwig Cc: Dave Gilbert Cc: Nick Terrell Signed-off-by: Andrew Morton --- include/linux/xxhash.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index df42511438d0..27f57eca8cb1 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -177,32 +177,6 @@ struct xxh64_state { */ void xxh32_reset(struct xxh32_state *state, uint32_t seed); -/** - * xxh32_update() - hash the data given and update the xxh32 state - * - * @state: The xxh32 state to update. - * @input: The data to hash. - * @length: The length of the data to hash. - * - * After calling xxh32_reset() call xxh32_update() as many times as necessary. - * - * Return: Zero on success, otherwise an error code. - */ -int xxh32_update(struct xxh32_state *state, const void *input, size_t length); - -/** - * xxh32_digest() - produce the current xxh32 hash - * - * @state: Produce the current xxh32 hash of this state. - * - * A hash value can be produced at any time. It is still possible to continue - * inserting input into the hash state after a call to xxh32_digest(), and - * generate new hashes later on, by calling xxh32_digest() again. - * - * Return: The xxh32 hash stored in the state. - */ -uint32_t xxh32_digest(const struct xxh32_state *state); - /** * xxh64_reset() - reset the xxh64 state to start a new hashing operation * -- cgit v1.2.3 From 07d24902977e4704fab8472981e73a0ad6dfa1fd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 10 Jun 2025 08:53:27 +0000 Subject: kexec: enable CMA based contiguous allocation When booting a new kernel with kexec_file, the kernel picks a target location that the kernel should live at, then allocates random pages, checks whether any of those patches magically happens to coincide with a target address range and if so, uses them for that range. For every page allocated this way, it then creates a page list that the relocation code - code that executes while all CPUs are off and we are just about to jump into the new kernel - copies to their final memory location. We can not put them there before, because chances are pretty good that at least some page in the target range is already in use by the currently running Linux environment. Copying is happening from a single CPU at RAM rate, which takes around 4-50 ms per 100 MiB. All of this is inefficient and error prone. To successfully kexec, we need to quiesce all devices of the outgoing kernel so they don't scribble over the new kernel's memory. We have seen cases where that does not happen properly (*cough* GIC *cough*) and hence the new kernel was corrupted. This started a month long journey to root cause failing kexecs to eventually see memory corruption, because the new kernel was corrupted severely enough that it could not emit output to tell us about the fact that it was corrupted. By allocating memory for the next kernel from a memory range that is guaranteed scribbling free, we can boot the next kernel up to a point where it is at least able to detect corruption and maybe even stop it before it becomes severe. This increases the chance for successful kexecs. Since kexec got introduced, Linux has gained the CMA framework which can perform physically contiguous memory mappings, while keeping that memory available for movable memory when it is not needed for contiguous allocations. The default CMA allocator is for DMA allocations. This patch adds logic to the kexec file loader to attempt to place the target payload at a location allocated from CMA. If successful, it uses that memory range directly instead of creating copy instructions during the hot phase. To ensure that there is a safety net in case anything goes wrong with the CMA allocation, it also adds a flag for user space to force disable CMA allocations. Using CMA allocations has two advantages: 1) Faster by 4-50 ms per 100 MiB. There is no more need to copy in the hot phase. 2) More robust. Even if by accident some page is still in use for DMA, the new kernel image will be safe from that access because it resides in a memory region that is considered allocated in the old kernel and has a chance to reinitialize that component. Link: https://lkml.kernel.org/r/20250610085327.51817-1-graf@amazon.com Signed-off-by: Alexander Graf Acked-by: Baoquan He Reviewed-by: Pasha Tatashin Cc: Zhongkun He Signed-off-by: Andrew Morton --- include/linux/kexec.h | 10 ++++++++++ include/uapi/linux/kexec.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 03f85ad03025..1b10a5d84b68 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes; typedef unsigned long kimage_entry_t; +/* + * This is a copy of the UAPI struct kexec_segment and must be identical + * to it because it gets copied straight from user space into kernel + * memory. Do not modify this structure unless you change the way segments + * get ingested from user space. + */ struct kexec_segment { /* * This pointer can point to user memory if kexec_load() system @@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_align: Minimum alignment needed. * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. + * @cma: CMA page if the buffer is backed by CMA. * @top_down: Allocate from top of memory. * @random: Place the buffer at a random position. */ @@ -184,6 +191,7 @@ struct kexec_buf { unsigned long buf_align; unsigned long buf_min; unsigned long buf_max; + struct page *cma; bool top_down; #ifdef CONFIG_CRASH_DUMP bool random; @@ -340,6 +348,7 @@ struct kimage { unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; + struct page *segment_cma[KEXEC_SEGMENT_MAX]; struct list_head control_pages; struct list_head dest_pages; @@ -361,6 +370,7 @@ struct kimage { */ unsigned int hotplug_support:1; #endif + unsigned int no_cma:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 5ae1741ea8ea..8958ebfcff94 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -27,6 +27,7 @@ #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 #define KEXEC_FILE_DEBUG 0x00000008 +#define KEXEC_FILE_NO_CMA 0x00000010 /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. -- cgit v1.2.3 From 004f42dd90b7ef542a51983bdaa5b2ef621ed41d Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:30 +0800 Subject: xen/xenbus: fix typo "notifer" There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/C6633C66376C709A+20250722073431.21983-7-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Juergen Gross Signed-off-by: Andrew Morton --- include/xen/xenbus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 3f90bdd387b6..00b84f2e402b 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -180,7 +180,7 @@ int xenbus_printf(struct xenbus_transaction t, * sprintf-style type string, and pointer. Returns 0 or errno.*/ int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); -/* notifer routines for when the xenstore comes up */ +/* notifier routines for when the xenstore comes up */ extern int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb); void unregister_xenstore_notifier(struct notifier_block *nb); -- cgit v1.2.3 From d171b10b2d7b067c16d79e1d069a23a34f088d23 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 22 Jul 2025 11:22:30 -0700 Subject: mm/page-flags: remove folio_start_writeback_keepwrite() Commit cd57b77197a4 ("ext4: Convert ext4_bio_write_page() to use a folio) removed set_page_writeback_keepwrite() which was the last/only caller of folio_start_writeback_keepwrite(). Link: https://lkml.kernel.org/r/20250722182230.2114587-1-joannelkoong@gmail.com Signed-off-by: Joanne Koong Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8e4d6eda8a8d..8d3fa3a91ce4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -837,8 +837,6 @@ void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) -#define folio_start_writeback_keepwrite(folio) \ - __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { -- cgit v1.2.3 From f225b34f1e6c81c50e48f6207ddb6d290be1b932 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:41 +0100 Subject: mm/mseal: always define VM_SEALED Patch series "mseal cleanups", v4. Perform a number of cleanups to the mseal logic. Firstly, VM_SEALED is treated differently from every other VMA flag, it really doesn't make sense to do this, so we start by making this consistent with everything else. Next we place the madvise logic where it belongs - in mm/madvise.c. It really makes no sense to abstract this elsewhere. In doing so, we go to great lengths to explain very clearly the previously very confusing logic as to what sealed mappings are impacted here. In doing so, we retain existing logic regarding treatment of madvise() discard operations for a sealed, read-only MAP_PRIVATE file-backed mapping. This is something we likely need to revisit. We then abstract out and explain the 'are there are any gaps in this range in the mm?' check being performed as a prerequisite to mseal being performed. Finally, we simplify the actual mseal logic which is really quite straightforward. No functional change is intended. This patch (of 4): There is no reason to treat VM_SEALED in a special way, in each other case in which a VMA flag is unavailable due to configuration, we simply assign that flag to VM_NONE, so make VM_SEALED consistent with all other VMA flags in this respect. Additionally, use the next available bit for VM_SEALED, 42, rather than arbitrarily putting it at 63 and update the declaration to match all other VMA flags. No functional change intended. Link: https://lkml.kernel.org/r/cover.1753431105.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/aeb398a77029b6e7377cd944328bc9bbc3c90537.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jeff Xu Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e3a4c5b78ff..ceaa780a703a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ -- cgit v1.2.3 From 3dfde97800e06882960cc926d2c428f2128b7c70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Jul 2025 10:52:59 +0530 Subject: mm: add get_and_clear_ptes() and clear_ptes() Patch series "Optimizations for khugepaged", v4. If the underlying folio mapped by the ptes is large, we can process those ptes in a batch using folio_pte_batch(). For arm64 specifically, this results in a 16x reduction in the number of ptep_get() calls, since on a contig block, ptep_get() on arm64 will iterate through all 16 entries to collect a/d bits. Next, ptep_clear() will cause a TLBI for every contig block in the range via contpte_try_unfold(). Instead, use clear_ptes() to only do the TLBI at the first and last contig block of the range. For split folios, there will be no pte batching; the batch size returned by folio_pte_batch() will be 1. For pagetable split folios, the ptes will still point to the same large folio; for arm64, this results in the optimization described above, and for other arches, a minor improvement is expected due to a reduction in the number of function calls and batching atomic operations. This patch (of 3): Let's add variants to be used where "full" does not apply -- which will be the majority of cases in the future. "full" really only applies if we are about to tear down a full MM. Use get_and_clear_ptes() in existing code, clear_ptes() users will be added next. Link: https://lkml.kernel.org/r/20250724052301.23844-2-dev.jain@arm.com Signed-off-by: David Hildenbrand Signed-off-by: Dev Jain Reviewed-by: Baolin Wang Reviewed-by: Barry Song Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e3b99920be05..4c035637eeb7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, } #endif +/** + * get_and_clear_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of get_and_clear_full_ptes() if it is known that we don't + * need to clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + return get_and_clear_full_ptes(mm, addr, ptep, nr, 0); +} + #ifndef clear_full_ptes /** * clear_full_ptes - Clear present PTEs that map consecutive pages of the same @@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, } #endif +/** + * clear_ptes - Clear present PTEs that map consecutive pages of the same folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of clear_full_ptes() if it is known that we don't need to + * clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + clear_full_ptes(mm, addr, ptep, nr, 0); +} + /* * If two threads concurrently fault at the same page, the thread that * won the race updates the PTE and its local TLB/Cache. The other thread -- cgit v1.2.3 From 9a4f90e246615d1f42a9b907deb9b4c0a418d996 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 15:29:01 +0100 Subject: mm: remove mm/io-mapping.c This is dead code, which was used from commit b739f125e4eb ("i915: use io_mapping_map_user") but reverted a month later by commit 0e4fe0c9f2f9 ("Revert "i915: use io_mapping_map_user"") back in 2021. Since then nobody has used it, so remove it. [akpm@linux-foundation.org: update Documentation/core-api/mm-api.rst, per Vlastimil] Link: https://lkml.kernel.org/r/20250725142901.81502-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/io-mapping.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7376c1df9c90..c16353cc6e3c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size); - #endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From a222439e1e273fa0f4e37ce17aeb109f3e91824f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Jul 2025 14:16:24 +0200 Subject: mm/rmap: add anon_vma lifetime debug check If an anon folio is mapped into userspace, its anon_vma must be alive, otherwise rmap walks can hit UAF. There have been syzkaller reports a few months ago[1][2] of UAF in rmap walks that seems to indicate that there can be pages with elevated mapcount whose anon_vma has already been freed, but I think we never figured out what the cause is; and syzkaller only hit these UAFs when memory pressure randomly caused reclaim to rmap-walk the affected pages, so it of course didn't manage to create a reproducer. Add a VM_WARN_ON_FOLIO() when we add/remove mappings of anonymous folios to hopefully catch such issues more reliably. [1] https://lore.kernel.org/r/67abaeaf.050a0220.110943.0041.GAE@google.com [2] https://lore.kernel.org/r/67a76f33.050a0220.3d72c.0028.GAE@google.com Link: https://lkml.kernel.org/r/20250725-anonvma-uaf-debug-v2-1-bc3c7e5ba5b1@google.com Signed-off-by: Jann Horn Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Acked-by: Harry Yoo Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/rmap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 20803fcb49a7..6cd020eea37a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, default: VM_WARN_ON_ONCE(true); } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); + } } /* -- cgit v1.2.3 From 9bbffee67ffd16360179327b57f3b1245579ef08 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 28 Jul 2025 10:53:55 -0700 Subject: mm: fix a UAF when vma->mm is freed after vma->vm_refcnt got dropped By inducing delays in the right places, Jann Horn created a reproducer for a hard to hit UAF issue that became possible after VMAs were allowed to be recycled by adding SLAB_TYPESAFE_BY_RCU to their cache. Race description is borrowed from Jann's discovery report: lock_vma_under_rcu() looks up a VMA locklessly with mas_walk() under rcu_read_lock(). At that point, the VMA may be concurrently freed, and it can be recycled by another process. vma_start_read() then increments the vma->vm_refcnt (if it is in an acceptable range), and if this succeeds, vma_start_read() can return a recycled VMA. In this scenario where the VMA has been recycled, lock_vma_under_rcu() will then detect the mismatching ->vm_mm pointer and drop the VMA through vma_end_read(), which calls vma_refcount_put(). vma_refcount_put() drops the refcount and then calls rcuwait_wake_up() using a copy of vma->vm_mm. This is wrong: It implicitly assumes that the caller is keeping the VMA's mm alive, but in this scenario the caller has no relation to the VMA's mm, so the rcuwait_wake_up() can cause UAF. The diagram depicting the race: T1 T2 T3 == == == lock_vma_under_rcu mas_walk mmap vma_start_read __refcount_inc_not_zero_limited_acquire munmap __vma_enter_locked refcount_add_not_zero vma_end_read vma_refcount_put __refcount_dec_and_test rcuwait_wait_event rcuwait_wake_up [UAF] Note that rcuwait_wait_event() in T3 does not block because refcount was already dropped by T1. At this point T3 can exit and free the mm causing UAF in T1. To avoid this we move vma->vm_mm verification into vma_start_read() and grab vma->vm_mm to stabilize it before vma_refcount_put() operation. [surenb@google.com: v3] Link: https://lkml.kernel.org/r/20250729145709.2731370-1-surenb@google.com Link: https://lkml.kernel.org/r/20250728175355.2282375-1-surenb@google.com Fixes: 3104138517fc ("mm: make vma cache SLAB_TYPESAFE_BY_RCU") Signed-off-by: Suren Baghdasaryan Reported-by: Jann Horn Closes: https://lore.kernel.org/all/CAG48ez0-deFbVH=E3jbkWx=X3uVbd8nWeo6kbJPQ0KoUD+m2tA@mail.gmail.com/ Reviewed-by: Vlastimil Babka Acked-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Signed-off-by: Andrew Morton --- include/linux/mmap_lock.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 1f4f44951abe..11a078de9150 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w); #include #include #include +#include #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check -- cgit v1.2.3 From fcd90ad31e29d0b403f3a074a64cd7f0876175dd Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:23 +0300 Subject: execmem: drop unused execmem_update_copy() Patch series "x86: enable EXECMEM_ROX_CACHE for ftrace and kprobes", v3. These patches enable use of EXECMEM_ROX_CACHE for ftrace and kprobes allocations on x86. They also include some ground work in execmem. Since the execmem model for caching large ROX pages changed from the initial assumption that the memory that is allocated from ROX cache is always ROX to the current state where memory can be temporarily made RW and then restored to ROX, we can stop using text poking to update it. This also saves the hassle of trying lock text_mutex in execmem_cache_free() when kprobes already hold that mutex. This patch (of 8): The execmem_update_copy() that used text poking was required when memory allocated from ROX cache was always read-only. Since now its permissions can be switched to read-write there is no need in a function that updates memory with text poking. Remove it. Link: https://lkml.kernel.org/r/20250713071730.4117334-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250713071730.4117334-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/execmem.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 3be35680a54f..734fbe83d98e 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -185,19 +185,6 @@ DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); struct vm_struct *execmem_vmap(size_t size); #endif -/** - * execmem_update_copy - copy an update to executable memory - * @dst: destination address to update - * @src: source address containing the data - * @size: how many bytes of memory shold be copied - * - * Copy @size bytes from @src to @dst using text poking if the memory at - * @dst is read-only. - * - * Return: a pointer to @dst or NULL on error - */ -void *execmem_update_copy(void *dst, const void *src, size_t size); - /** * execmem_is_rox - check if execmem is read-only * @type - the execmem type to check -- cgit v1.2.3 From 838955f64ae7582f009a3538889bb9244f37ab26 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:24 +0300 Subject: execmem: introduce execmem_alloc_rw() Some callers of execmem_alloc() require the memory to be temporarily writable even when it is allocated from ROX cache. These callers use execemem_make_temp_rw() right after the call to execmem_alloc(). Wrap this sequence in execmem_alloc_rw() API. Link: https://lkml.kernel.org/r/20250713071730.4117334-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Acked-by: Peter Zijlstra (Intel) Cc: Masami Hiramatsu (Google) Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/execmem.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 734fbe83d98e..8b61b05da7d5 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -67,21 +67,6 @@ enum execmem_range_flags { */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); -/** - * execmem_make_temp_rw - temporarily remap region with read-write - * permissions - * @ptr: address of the region to remap - * @size: size of the region to remap - * - * Remaps a part of the cached large page in the ROX cache in the range - * [@ptr, @ptr + @size) as writable and not executable. The caller must - * have exclusive ownership of this range and ensure nothing will try to - * execute code in this range. - * - * Return: 0 on success or negative error code on failure. - */ -int execmem_make_temp_rw(void *ptr, size_t size); - /** * execmem_restore_rox - restore read-only-execute permissions * @ptr: address of the region to remap @@ -95,7 +80,6 @@ int execmem_make_temp_rw(void *ptr, size_t size); */ int execmem_restore_rox(void *ptr, size_t size); #else -static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif @@ -165,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void); */ void *execmem_alloc(enum execmem_type type, size_t size); +/** + * execmem_alloc_rw - allocate writable executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * Forces writable permissions on the allocated memory and the caller is + * responsible to manage the permissions afterwards. + * + * For architectures that use ROX cache the permissions will be set to R+W. + * For architectures that don't use ROX cache the default permissions for @type + * will be used as they must be writable. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc_rw(enum execmem_type type, size_t size); + /** * execmem_free - free executable memory * @ptr: pointer to the memory that should be freed -- cgit v1.2.3 From ab674b6871b049aab2e86d1d7375526368ed175a Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:28 +0300 Subject: execmem: drop writable parameter from execmem_fill_trapping_insns() After update of execmem_cache_free() that made memory writable before updating it, there is no need to update read only memory, so the writable parameter to execmem_fill_trapping_insns() is not needed. Drop it. Link: https://lkml.kernel.org/r/20250713071730.4117334-7-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/execmem.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 8b61b05da7d5..7de229134e30 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -60,12 +60,11 @@ enum execmem_range_flags { * will trap * @ptr: pointer to memory to fill * @size: size of the range to fill - * @writable: is the memory poited by @ptr is writable or ROX * * A hook for architecures to fill execmem ranges with invalid instructions. * Architectures that use EXECMEM_ROX_CACHE must implement this. */ -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); +void execmem_fill_trapping_insns(void *ptr, size_t size); /** * execmem_restore_rox - restore read-only-execute permissions -- cgit v1.2.3 From bb5b0b4317c9516bdc5e9a4235e3b5f1a73b7e48 Mon Sep 17 00:00:00 2001 From: Joshua Kinard Date: Mon, 21 Jul 2025 13:00:51 -0400 Subject: rtc: ds1685: Update Joshua Kinard's email address. I am switching my address to a personal domain, so need to update the driver's files and the entry in MAINTAINERS. Signed-off-by: Joshua Kinard Link: https://lore.kernel.org/r/20250721170051.32407-1-kumba@gentoo.org Signed-off-by: Alexandre Belloni --- include/linux/rtc/ds1685.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 5a41c3bbcbe3..01da4582db6d 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -8,7 +8,7 @@ * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC * write counter. * - * Copyright (C) 2011-2014 Joshua Kinard . + * Copyright (C) 2011-2014 Joshua Kinard . * Copyright (C) 2009 Matthias Fuchs . * * References: -- cgit v1.2.3 From 89c52146392948f4cdda3853da9d82ec6d1dd1f4 Mon Sep 17 00:00:00 2001 From: Marcos Alano Date: Tue, 5 Aug 2025 13:44:29 -0700 Subject: Input: add keycode for performance mode key Alienware calls this key "Performance Boost". Dell calls it "G-Mode". The goal is to have a specific keycode to detect when this key is pressed, so userspace can act upon it and do what have to do, usually starting the power profile for performance. Signed-off-by: Marcos Alano Link: https://lore.kernel.org/r/20250509193708.2190586-1-marcoshalano@gmail.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 08cb157ab593..ca5851e97fac 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -770,6 +770,9 @@ #define KEY_KBD_LCD_MENU4 0x2bb #define KEY_KBD_LCD_MENU5 0x2bc +/* Performance Boost key (Alienware)/G-Mode key (Dell) */ +#define KEY_PERFORMANCE 0x2bd + #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 -- cgit v1.2.3 From 86624ba3b522b6512def25534341da93356c8da4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Jul 2025 13:08:25 -0300 Subject: vfio/pci: Do vf_token checks for VFIO_DEVICE_BIND_IOMMUFD This was missed during the initial implementation. The VFIO PCI encodes the vf_token inside the device name when opening the device from the group FD, something like: "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3" This is used to control access to a VF unless there is co-ordination with the owner of the PF. Since we no longer have a device name in the cdev path, pass the token directly through VFIO_DEVICE_BIND_IOMMUFD using an optional field indicated by VFIO_DEVICE_BIND_FLAG_TOKEN. Fixes: 5fcc26969a16 ("vfio: Add VFIO_DEVICE_BIND_IOMMUFD") Tested-by: Shameer Kolothum Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v3-bdd8716e85fe+3978a-vfio_token_jgg@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ include/linux/vfio_pci_core.h | 2 ++ include/uapi/linux/vfio.h | 12 +++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 707b00772ce1..eb563f538dee 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -105,6 +105,9 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @match_token_uuid: Optional device token match/validation. Return 0 + * if the uuid is valid for the device, -errno otherwise. uuid is NULL + * if none was provided. * @dma_unmap: Called when userspace unmaps IOVA from the container * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl @@ -132,6 +135,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*match_token_uuid)(struct vfio_device *vdev, const uuid_t *uuid); void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b3..f541044e42a2 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -122,6 +122,8 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5764f315137f..75100bf009ba 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -905,10 +905,12 @@ struct vfio_device_feature { * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, * struct vfio_device_bind_iommufd) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Must be 0 or a bit flags of VFIO_DEVICE_BIND_* * @iommufd: iommufd to bind. * @out_devid: The device id generated by this bind. devid is a handle for * this device/iommufd bond and can be used in IOMMUFD commands. + * @token_uuid_ptr: Valid if VFIO_DEVICE_BIND_FLAG_TOKEN. Points to a 16 byte + * UUID in the same format as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN. * * Bind a vfio_device to the specified iommufd. * @@ -917,13 +919,21 @@ struct vfio_device_feature { * * Unbind is automatically conducted when device fd is closed. * + * A token is sometimes required to open the device, unless this is known to be + * needed VFIO_DEVICE_BIND_FLAG_TOKEN should not be set and token_uuid_ptr is + * ignored. The only case today is a PF/VF relationship where the VF bind must + * be provided the same token as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN provided to + * the PF. + * * Return: 0 on success, -errno on failure. */ struct vfio_device_bind_iommufd { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_BIND_FLAG_TOKEN (1 << 0) __s32 iommufd; __u32 out_devid; + __aligned_u64 token_uuid_ptr; }; #define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) -- cgit v1.2.3 From 397a46c9aa3343e8efe6847bdaa124945bab1de4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 25 Jul 2025 09:46:50 +0200 Subject: gpio: remove legacy GPIO line value setter callbacks With no more users of the legacy GPIO line value setters - .set() and .set_multiple() - we can now remove them from the kernel. Link: https://lore.kernel.org/r/20250725074651.14002-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4b984e8f8fcd..90567dde7d8e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,8 +347,6 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: **DEPRECATED** - please use set_rv() instead - * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead * @set_rv: assigns output value for signal "offset", returns 0 on success or * negative error value * @set_multiple_rv: assigns output values for multiple signals defined by @@ -445,11 +443,6 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - void (*set)(struct gpio_chip *gc, - unsigned int offset, int value); - void (*set_multiple)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); int (*set_rv)(struct gpio_chip *gc, unsigned int offset, int value); -- cgit v1.2.3 From d9d87d90cc0b10cd56ae353f50b11417e7d21712 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 17 Jul 2025 15:21:26 +0200 Subject: treewide: rename GPIO set callbacks back to their original names The conversion of all GPIO drivers to using the .set_rv() and .set_multiple_rv() callbacks from struct gpio_chip (which - unlike their predecessors - return an integer and allow the controller drivers to indicate failures to users) is now complete and the legacy ones have been removed. Rename the new callbacks back to their original names in one sweeping change. Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 19 +++++++++---------- include/linux/gpio/generic.h | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 90567dde7d8e..667f8fd58a79 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,10 +347,10 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set_rv: assigns output value for signal "offset", returns 0 on success or - * negative error value - * @set_multiple_rv: assigns output values for multiple signals defined by - * "mask", returns 0 on success or negative error value + * @set: assigns output value for signal "offset", returns 0 on success or + * negative error value + * @set_multiple: assigns output values for multiple signals defined by + * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. Must return 0 on success and * a negative error number on failure. @@ -443,12 +443,11 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - int (*set_rv)(struct gpio_chip *gc, - unsigned int offset, - int value); - int (*set_multiple_rv)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); + int (*set)(struct gpio_chip *gc, + unsigned int offset, int value); + int (*set_multiple)(struct gpio_chip *gc, + unsigned long *mask, + unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index b511acd58ab0..f3a8db4598bb 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -88,10 +88,10 @@ static inline int gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, int value) { - if (WARN_ON(!chip->gc.set_rv)) + if (WARN_ON(!chip->gc.set)) return -EOPNOTSUPP; - return chip->gc.set_rv(&chip->gc, offset, value); + return chip->gc.set(&chip->gc, offset, value); } #define gpio_generic_chip_lock(gen_gc) \ -- cgit v1.2.3 From 42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:12 +0800 Subject: lib/sbitmap: convert shallow_depth from one word to the whole sbitmap Currently elevators will record internal 'async_depth' to throttle asynchronous requests, and they both calculate shallow_dpeth based on sb->shift, with the respect that sb->shift is the available tags in one word. However, sb->shift is not the availbale tags in the last word, see __map_depth: if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); For consequence, if the last word is used, more tags can be get than expected, for example, assume nr_requests=256 and there are four words, in the worst case if user set nr_requests=32, then the first word is the last word, and still use bits per word, which is 64, to calculate async_depth is wrong. One the ohter hand, due to cgroup qos, bfq can allow only one request to be allocated, and set shallow_dpeth=1 will still allow the number of words request to be allocated. Fix this problems by using shallow_depth to the whole sbitmap instead of per word, also change kyber, mq-deadline and bfq to follow this, a new helper __map_depth_with_shallow() is introduced to calculate available bits in each word. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 189140bf11fc..4adf4b364fcd 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb); * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * @@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the queue. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after -- cgit v1.2.3 From 45fa9f97e65231a9fd4f9429489cb74c10ccd0fd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:13 +0800 Subject: lib/sbitmap: make sbitmap_get_shallow() internal Because it's only used in sbitmap.c Signed-off-by: Yu Kuai Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20250807032413.1469456-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4adf4b364fcd..ffb9907c7070 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -209,23 +209,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); */ int sbitmap_get(struct sbitmap *sb); -/** - * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, - * limiting the depth used from each word. - * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from the bitmap. - * - * This rather specific operation allows for having multiple users with - * different allocation limits. E.g., there can be a high-priority class that - * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority - * class can only allocate half of the total bits in the bitmap, preventing it - * from starving out the high-priority class. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); - /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. -- cgit v1.2.3 From 5378bdf6a611a32500fccf13d14156f219bb0c85 Mon Sep 17 00:00:00 2001 From: Adam Young Date: Mon, 14 Jul 2025 20:10:07 -0400 Subject: mailbox/pcc: support mailbox management of the shared buffer Define a new, optional, callback that allows the driver to specify how the return data buffer is allocated. If that callback is set, mailbox/pcc.c is now responsible for reading from and writing to the PCC shared buffer. This also allows for proper checks of the Commnand complete flag between the PCC sender and receiver. For Type 4 channels, initialize the command complete flag prior to accepting messages. Since the mailbox does not know what memory allocation scheme to use for response messages, the client now has an optional callback that allows it to allocate the buffer for a response message. When an outbound message is written to the buffer, the mailbox checks for the flag indicating the client wants an tx complete notification via IRQ. Upon receipt of the interrupt It will pair it with the outgoing message. The expected use is to free the kernel memory buffer for the previous outgoing message. Signed-off-by: Adam Young Signed-off-by: Jassi Brar --- include/acpi/pcc.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 840bfc95bae3..9af3b502f839 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -17,6 +17,35 @@ struct pcc_mbox_chan { u32 latency; u32 max_access_rate; u16 min_turnaround_time; + + /* Set to true to indicate that the mailbox should manage + * writing the dat to the shared buffer. This differs from + * the case where the drivesr are writing to the buffer and + * using send_data only to ring the doorbell. If this flag + * is set, then the void * data parameter of send_data must + * point to a kernel-memory buffer formatted in accordance with + * the PCC specification. + * + * The active buffer management will include reading the + * notify_on_completion flag, and will then + * call mbox_chan_txdone when the acknowledgment interrupt is + * received. + */ + bool manage_writes; + + /* Optional callback that allows the driver + * to allocate the memory used for receiving + * messages. The return value is the location + * inside the buffer where the mailbox should write the data. + */ + void *(*rx_alloc)(struct mbox_client *cl, int size); +}; + +struct pcc_header { + u32 signature; + u32 flags; + u32 length; + u32 command; }; /* Generic Communications Channel Shared Memory Region */ -- cgit v1.2.3